Currently can't figure out why one pipeline works and the other doesn't. I got both pipelines from MongoDB charts and they both returned something and displaying charts on MongoDBCharts. However, when I use them in my code, only the first pipeline returns something. I used the same data for all cases. Any suggestions would be greatly appreciated!
The first one doesn't filter the last 30 days (hard coded by Mongo), both pipelines are copied from Mongodb charts and are not altered.
[
{
"$addFields": {
"trigger_time": {
"$convert": {
"input": "$trigger_time",
"to": "date",
"onError": null
}
}
}
},
{
"$match": {
"event_type": {
"$nin": [
null,
"",
"AC Lost",
"Device Lost",
"logged into Database",
"logged into Nexus Database",
"logged out of Nexus Database",
"Low Battery"
]
}
}
},
{
"$addFields": {
"trigger_time": {
"$cond": {
"if": {
"$eq": [
{
"$type": "$trigger_time"
},
"date"
]
},
"then": "$trigger_time",
"else": null
}
}
}
},
{
"$addFields": {
"__alias_0": {
"hours": {
"$hour": "$trigger_time"
}
}
}
},
{
"$group": {
"_id": {
"__alias_0": "$__alias_0"
},
"__alias_1": {
"$sum": 1
}
}
},
{
"$project": {
"_id": 0,
"__alias_0": "$_id.__alias_0",
"__alias_1": 1
}
},
{
"$project": {
"y": "$__alias_1",
"x": "$__alias_0",
"_id": 0
}
},
{
"$sort": {
"x.hours": 1
}
},
{
"$limit": 5000
}
]
The second one
[
{
"$addFields": {
"trigger_time": {
"$convert": {
"input": "$trigger_time",
"to": "date",
"onError": null
}
}
}
},
{
"$match": {
"event_type": {
"$nin": [
null,
"",
"AC Lost",
"Device Lost",
"logged into Database",
"logged into Nexus Database",
"logged out of Nexus Database",
"Low Battery"
]
},
"trigger_time": {
"$gte": {
"$date": "2021-03-29T08:35:47.804Z"
}
}
}
},
{
"$addFields": {
"trigger_time": {
"$cond": {
"if": {
"$eq": [
{
"$type": "$trigger_time"
},
"date"
]
},
"then": "$trigger_time",
"else": null
}
}
}
},
{
"$addFields": {
"__alias_0": {
"hours": {
"$hour": "$trigger_time"
}
}
}
},
{
"$group": {
"_id": {
"__alias_0": "$__alias_0"
},
"__alias_1": {
"$sum": 1
}
}
},
{
"$project": {
"_id": 0,
"__alias_0": "$_id.__alias_0",
"__alias_1": 1
}
},
{
"$project": {
"y": "$__alias_1",
"x": "$__alias_0",
"_id": 0
}
},
{
"$sort": {
"x.hours": 1
}
},
{
"$limit": 5000
}
]
I end up solving my own problem. After a bit of digging and asking.
Node.js does some funny things with Mongodb when it comes to using '$date', that's why the pipeline didn't work.
The resolve was to remove '$date' and pass in a date object. For my case,
"trigger_time": {
"$gte": new Date("2021-03-29T08:35:47.804Z")
}
Related
Employee has multiple employeeActions, the employeeActions data looks like this:
[
{
"email": "one#gmail.com",
"companyRegNo": 105,
"event": {
"created": ISODate("2022-09-16T06:42:04.387Z"),
"desc": "COMPLETED_APPLICATIONS",
"note": "Direct apply"
}
},
{
"email": "one#gmail.com",
"companyRegNo": 105,
"event": {
"created": ISODate("2022-09-20T06:42:42.761Z"),
"desc": "ASKED_TO_REVIEW",
}
},
{
"email": "two#gmail.com",
"companyRegNo": 227,
"event": {
"created": ISODate("2022-09-16T06:42:04.387Z"),
"desc": "COMPLETED_APPLICATIONS",
"note": "Direct apply",
}
},
{
"email": "two#gmail.com",
"companyRegNo": 227,
"event": {
"created": ISODate("2022-09-28T06:42:42.761Z"),
"desc": "ASKED_TO_REVIEW",
}
},
{
"email": "three#gmail.com",
"companyRegNo": 157,
"event": {
"created": ISODate("2022-09-16T06:42:04.387Z"),
"desc": "COMPLETED_APPLICATIONS",
"note": "Direct apply",
}
},
{
"email": "four#gmail.com",
"companyRegNo": 201,
"deleted": true,
"event": {
"created": ISODate("2022-09-15T06:42:42.761Z"),
"desc": "COMPLETED_APPLICATIONS",
}
},
]
I need to write an aggregation query to get all email ids where the employee action of the user
- Does not have an ASKED_TO_REVIEW event created before '2022-09-25'
- deleted is either false or does not exist
The out put should have only
{"email": "one#gmail.com"}
{"email": "three#gmail.com"}
The below match and project query did not work
db.collection.aggregate([
{
"$match": {
"$and": [
{
"deleted": {
"$ne": true
}
},
{
"$or": [
{
"$and": [
{
"event.name": {
"$eq": "ASKED_TO_REVIEW"
}
},
{
"event.created": {
"$lt": ISODate("2022-09-25")
}
}
]
},
{
"event.name": {
"$ne": "ASKED_TO_REVIEW"
}
}
]
}
]
}
},
{
"$project": {
"email": 1,
"_id": 0
}
}
])
How do i go about this?
You need to group the events by email and then apply your filtering logic to those groups, something like this:
db.collection.aggregate([
{
"$group": {
"_id": "$email",
"field": {
"$push": "$$ROOT"
}
}
},
{
"$match": {
$expr: {
"$eq": [
0,
{
"$size": {
"$filter": {
"input": "$field",
"as": "item",
"cond": {
"$or": [
{
"$and": [
{
"$eq": [
{
"$getField": {
"field": "desc",
"input": "$$item.event"
}
},
"ASKED_TO_REVIEW"
]
},
{
"$lt": [
{
"$getField": {
"field": "created",
"input": "$$item.event"
}
},
ISODate("2022-09-25")
]
}
]
},
{
"$eq": [
{
"$getField": {
"field": "deleted",
"input": "$$item"
}
},
true
]
}
]
}
}
}
}
]
}
}
},
{
"$project": {
email: "$_id",
"_id": 0
}
}
])
Playground link.
Figured out the working query. After grouping by email, $elemMatch needs to be used for the and condition between "event.desc" and "event.created"
db.collection.aggregate([
{
"$group": {
"_id": "$email",
"field": {
"$push": "$$ROOT"
}
}
},
{
"$match": {
"$and": [
{
"field.deleted": {
"$ne": true
}
},
{
"$or": [
{
"field": {
"$elemMatch": {
"event.desc": "ASKED_TO_REVIEW",
"event.created": {
"$lt": ISODate("2022-09-25")
}
}
}
},
{
"field.event.desc": {
"$ne": "ASKED_TO_REVIEW"
}
}
]
}
]
}
},
{
"$project": {
email: "$_id",
"_id": 0
}
}
])
Playground Link
I am currently using the following query but hitting a small issue - It's really slow.
Is there any way that mongooseJS can either:
a) split this into smaller queries and run it in segments
b) a better way to query this data.
My query
[
{
$project: {
"location": 1,
"year": { "$year": "$timestamp" }
}
},
{
$match: { "year": 2020 }
},
{
$group: {
"_id": "$location.coordinates",
"count": { "$sum": 1 }
}
},
{
$project: {
"_id": 0,
"count": 1,
"location.coordinates": {
"$map": {
"input": "$_id",
"in": { "$toString": "$$this" }
}
}
}
},
{
$sort: { count: -1 }
}
])
in NodeJS I am querying it like I have M10 atlas so no issue running the query it works in Mongodb compass
router.get('/pastlocation', (req, res) =>{
const pastlocation = require('mongoose').model('pastlistenerslocation');
pastlocation.aggregate([
{
$project: {
"location": 1,
"year": { "$year": "$timestamp" }
}
},
{
$match: { "year": 2020 }
},
{
$group: {
"_id": "$location.coordinates",
"count": { "$sum": 1 }
}
},
{
$project: {
"_id": 0,
"count": 1,
"location.coordinates": {
"$map": {
"input": "$_id",
"in": { "$toString": "$$this" }
}
}
}
},
{ $sort: { count: -1 } }
])).allowDiskUse(true).exec(function(err,result) {
if(err){console.log(err)}
res.json(result);
});
});
Data looks like this:
{
"UUID": "9B3640AC-3AA9-4313-94DB-9DFFEC7362E1",
"location": {
"coordinates": [115.8802288492358, -31.92553401927643],
"type": "Point"
},
"timestamp": {
"$date": "2020-11-04T13:20:58.224Z"
},
"__v": 0
}
Im trying to get multiple count values only from multiple documents in a collection which looks like this,( basically I want to get a count of how many are from the 4 directions)
{
"empno": 1500,
"province": "North"
}
{
"empno": 1600,
"province": "West"
}
early I found a solution and implemented following query;
([
{ "$facet": {
"N": [
{ "$match": { "province": "North" }},
{ "$count": "N" }
],
"E": [
{ "$match": { "province": "East" }},
{ "$count": "E" }
],
"S": [
{ "$match": { "province": "South" }},
{ "$count": "S" }
],
"W": [
{ "$match": { "province": "West" }},
{ "$count": "W" }
]
}},
{ "$project": {
"N": { "$arrayElemAt": ["$N.N", 0] },
"E": { "$arrayElemAt": ["$E.E", 0] },
"S": { "$arrayElemAt": ["$S.S", 0] },
"W": { "$arrayElemAt": ["$W.W", 0] },
}}
])
The output I get is
{ N: 1, W: 1 }
How can I get the values only like without the keys and also I want the blank fields that are empty to be with a 0. Like this;
{1, 0, 0, 1}
Facet
Query
group by null, is the thing that you needed to add to get the count
Test code here
db.collection.aggregate([
{
"$facet": {
"g0": [
{
"$match": {
"province": {
"$eq": "North"
}
}
},
{
"$group": {
"_id": null,
"count": {
"$sum": 1
}
}
},
{
"$project": {
"_id": 0
}
}
],
"g1": [
{
"$match": {
"province": {
"$eq": "East"
}
}
},
{
"$group": {
"_id": null,
"count": {
"$sum": 1
}
}
},
{
"$project": {
"_id": 0
}
}
],
"g2": [
{
"$match": {
"province": {
"$eq": "South"
}
}
},
{
"$group": {
"_id": null,
"count": {
"$sum": 1
}
}
},
{
"$project": {
"_id": 0
}
}
],
"g3": [
{
"$match": {
"province": {
"$eq": "West"
}
}
},
{
"$group": {
"_id": null,
"count": {
"$sum": 1
}
}
},
{
"$project": {
"_id": 0
}
}
]
}
},
{
"$set": {
"data": {
"$map": {
"input": {
"$objectToArray": "$$ROOT"
},
"in": {
"$cond": [
{
"$eq": [
"$$d.v",
[]
]
},
0,
{
"$let": {
"vars": {
"m": {
"$arrayElemAt": [
"$$d.v",
0
]
}
},
"in": "$$m.count"
}
}
]
},
"as": "d"
}
}
}
},
{
"$project": {
"data": 1
}
}
])
Group
Query
group is used instead of facet (facet is like 1 aggregation per field)
each group have its index (from the array), some indexes will be missing (because no documents exist)
add a zero-data field that has all indexes and count=0 (see bellow)
add to zero-data, the data found (the ones that existed in the collection,and we have groups for them) the rest keep the count=0
Test code here
db.collection.aggregate([
{
"$group": {
"_id": {
"$switch": {
"branches": [
{
"case": {
"$eq": [
"$province",
"North"
]
},
"then": {
"index": 0,
"province": "North"
}
},
{
"case": {
"$eq": [
"$province",
"East"
]
},
"then": {
"index": 1,
"province": "East"
}
},
{
"case": {
"$eq": [
"$province",
"South"
]
},
"then": {
"index": 2,
"province": "South"
}
},
{
"case": {
"$eq": [
"$province",
"West"
]
},
"then": {
"index": 3,
"province": "West"
}
}
],
"default": {
"index": 5
}
}
},
"count": {
"$sum": 1
}
}
},
{
"$group": {
"_id": null,
"data": {
"$push": {
"index": "$_id.index",
"province": "$province",
"count": "$count"
}
}
}
},
{
"$project": {
"_id": 0
}
},
{
"$set": {
"zero-data": [
{
"index": 0,
"count": 0
},
{
"index": 1,
"count": 0
},
{
"index": 2,
"count": 0
},
{
"index": 3,
"count": 0
}
]
}
},
{
"$set": {
"data": {
"$reduce": {
"input": "$zero-data",
"initialValue": [],
"in": {
"$let": {
"vars": {
"all_data": "$$value",
"d": "$$this"
},
"in": {
"$let": {
"vars": {
"found_data": {
"$filter": {
"input": "$data",
"cond": {
"$eq": [
"$$d.index",
"$$d1.index"
]
},
"as": "d1"
}
}
},
"in": {
"$concatArrays": [
"$$all_data",
[
{
"$cond": [
{
"$eq": [
"$$found_data",
[]
]
},
{
"index": "$$d.index",
"count": 0
},
{
"$arrayElemAt": [
"$$found_data",
0
]
}
]
}
]
]
}
}
}
}
}
}
}
}
},
{
"$project": {
"data": {
"$map": {
"input": "$data",
"in": "$$this.count"
}
}
}
}
])
I'm struggling to understand how to query my data using MQL. My dataset looks a bit like this:
{
"_id": {
"$oid": "5dcadda84d59f2e0b0d56974"
},
"object_kind": "pipeline",
"object_attributes": {
"status": "success",
"created_at": "2019-11-12 16:28:22 UTC",
"variables": []
}
},
{
"_id": {
"$oid": "5dcadda84d59f2e0b0d56998"
},
"object_kind": "pipeline",
"object_attributes": {
"status": "failed",
"created_at": "2019-11-13 12:22:22 UTC",
"variables": []
}
}
I'm adding $eventDate using this in my aggregation, which works:
{
eventDate: { $dateFromString: {
dateString: {
$substr: [ "$object_attributes.created_at",0, 10 ]
}
}},
}
And I'm trying to turn it into this:
{
"eventDate": "2019-11-12",
"counts": {
"success": 1,
"failure": 0
}
},
{
"eventDate": "2019-11-13",
"counts": {
"success": 0,
"failure": 1
}
},
So far I can't seem to understand how to group the data twice, as if I group by "$eventDate" then I can't then group by status. Why can't I just group all docs from the same $eventDate into an array, without losing all the other fields?
It would be ideal if the success and failure fields which could be inferred from different statuses that appear in object_attributes.status
This can be done in several different ways, heres a quick example using a conditional sum:
db.collection.aggregate([
{
"$addFields": {
"eventDate": {
"$dateFromString": {
"dateString": {
"$substr": [
"$object_attributes.created_at",
0.0,
10.0
]
}
}
}
}
},
{
"$group": {
"_id": "$eventDate",
"success": {
"$sum": {
"$cond": [
{
"$eq": [
"$object_attributes.status",
"success"
]
},
1.0,
0.0
]
}
},
"failure": {
"$sum": {
"$cond": [
{
"$eq": [
"$object_attributes.status",
"failed"
]
},
1.0,
0.0
]
}
}
}
},
{
"$project": {
"eventDate": "$_id",
"counts": {
"success": "$success",
"failure": "$failure"
},
"_id": 0
}
}
]);
I want to write a group by query to written active user and total count(both active and inactive) grouped by a date column in mongodb. I am able to run them as two separate scripts but how to retrieve the same information in one script
db.user.aggregate(
{
"$match": { 'phoneInfo.verifiedFlag': true}
},
{
"$project": {
yearMonthDayUTC: { $dateToString: { format: "%Y-%m-%d", date: "$createdOn" } }
}
},
{
"$group": {
"_id": {day: "$yearMonthDayUTC"},
count: {
"$sum": 1
}
}
},
{
$sort: {
"_id.day": 1,
}
})
You can use the $cond operator in your group to create a conditional count as follows (assuming the inactive/active values are in a field called status):
db.user.aggregate([
{ "$match": { 'phoneInfo.verifiedFlag': true} },
{
"$group": {
"_id": { "$dateToString": { "format": "%Y-%m-%d", "date": "$createdOn" } },
"total": { "$sum": 1 },
"active_count": {
"$sum": {
"$cond": [ { "$eq": [ "$status", "active" ] }, 1, 0 ]
}
},
"inactive_count": {
"$sum": {
"$cond": [ { "$eq": [ "$status", "inactive" ] }, 1, 0 ]
}
}
}
},
{ "$sort": { "_id": 1 } }
])
For different values you can adapt the following pipeline:
db.user.aggregate([
{ "$match": { 'phoneInfo.verifiedFlag': true} },
{
"$group": {
"_id": {
"day": {
"$dateToString": {
"format": "%Y-%m-%d",
"date": "$createdOn"
}
},
"status": { "$toLower": "$status" }
},
"count": { "$sum": 1 }
}
},
{
"$group": {
"_id": "$_id.day",
"counts": {
"$push": {
"status": "$_id.status",
"count": "$count"
}
}
}
},
{ "$sort": { "_id": 1 } }
])