I'm struggling to understand how to query my data using MQL. My dataset looks a bit like this:
{
"_id": {
"$oid": "5dcadda84d59f2e0b0d56974"
},
"object_kind": "pipeline",
"object_attributes": {
"status": "success",
"created_at": "2019-11-12 16:28:22 UTC",
"variables": []
}
},
{
"_id": {
"$oid": "5dcadda84d59f2e0b0d56998"
},
"object_kind": "pipeline",
"object_attributes": {
"status": "failed",
"created_at": "2019-11-13 12:22:22 UTC",
"variables": []
}
}
I'm adding $eventDate using this in my aggregation, which works:
{
eventDate: { $dateFromString: {
dateString: {
$substr: [ "$object_attributes.created_at",0, 10 ]
}
}},
}
And I'm trying to turn it into this:
{
"eventDate": "2019-11-12",
"counts": {
"success": 1,
"failure": 0
}
},
{
"eventDate": "2019-11-13",
"counts": {
"success": 0,
"failure": 1
}
},
So far I can't seem to understand how to group the data twice, as if I group by "$eventDate" then I can't then group by status. Why can't I just group all docs from the same $eventDate into an array, without losing all the other fields?
It would be ideal if the success and failure fields which could be inferred from different statuses that appear in object_attributes.status
This can be done in several different ways, heres a quick example using a conditional sum:
db.collection.aggregate([
{
"$addFields": {
"eventDate": {
"$dateFromString": {
"dateString": {
"$substr": [
"$object_attributes.created_at",
0.0,
10.0
]
}
}
}
}
},
{
"$group": {
"_id": "$eventDate",
"success": {
"$sum": {
"$cond": [
{
"$eq": [
"$object_attributes.status",
"success"
]
},
1.0,
0.0
]
}
},
"failure": {
"$sum": {
"$cond": [
{
"$eq": [
"$object_attributes.status",
"failed"
]
},
1.0,
0.0
]
}
}
}
},
{
"$project": {
"eventDate": "$_id",
"counts": {
"success": "$success",
"failure": "$failure"
},
"_id": 0
}
}
]);
Related
I have a collection with documents in below format: (shown below 2 sample document)
1st doc:
{
"date": 20221101,
"time":1500,
"productCode": "toycar",
"purchaseHistory": [
{
"clientid": 123,
"status": "SUCCESS"
},
{
"clientid": 456,
"status": "FAILURE"
}
]
}
2nd doc:
{
"date": 20221101,
"time": 1500,
"productCode": "toycar",
"purchaseHistory": [
{
"clientid": 890,
"status": "SUCCESS"
},
{
"clientid": 678,
"status": "SUCCESS"
}
]
}
I want to query above and print output in below format where purchaseHistory.status = 'SUCCESS' and date = 20221101:
{productCode:"toycar", "time": 1500, "docCount": 2, "purchaseHistCount":3}
How can I achieve this?
I tried below:
db.products.aggregate({
$match : {date:20221101, 'purchaseHistory.status':'SUCCESS'},
"$group": {
"_id": {
"pc": "$productCode",
"time": "$time"
},
"docCount": {$sum :1}
}
})
Something like this maybe:
db.collection.aggregate([
{
$match: {
date: 20221101,
"purchaseHistory.status": "SUCCESS"
}
},
{
"$addFields": {
"purchaseHistory": {
"$filter": {
"input": "$purchaseHistory",
"as": "ph",
"cond": {
$eq: [
"$$ph.status",
"SUCCESS"
]
}
}
}
}
},
{
$group: {
_id: {
t: "$time",
pc: "$productCode"
},
docCount: {
$sum: 1
},
purchaseHistCount: {
$sum: {
$size: "$purchaseHistory"
}
}
}
}
])
Explained:
Filter the matched documents.
Filter the purchaseHistory SUCCESS only.
Group the result to see count of matching documents & matching purchaseHistory.
Playground
My sample data:
{
"_id": "random_id_1",
"priority": "P1",
"owners": ["user-1", "user-2"],
},
{
"_id": "random_id_2",
"priority": "P1",
"owners": ["user-1", "user-2"],
},
{
"_id": "random_id_3",
"priority": "P2",
"owners": ["user-1", "user-2"],
},
I want to run an aggregation pipeline on the data involving match filters and grouping, also I want to limit the number of groups returned as well as the number of items in each group.
Essentially, if limit=2, limit_per_group=1, group_by=owner, priority=P1, I want the following results:
[
{
"data": [
{
"group_key": "user-1",
"total_items_in_group": 2,
"limited_items": [
{
"_id": "random_id_1",
"priority": "P1",
"owners": ["user-1", "user-2"],
},
],
},
{
"group_key": "user-2",
"total_items_in_group": 2,
"limited_items": [
{
"_id": "random_id_1",
"priority": "P1",
"owners": ["user-1", "user-2"],
},
],
},
]
},
{
"metadata": {
"total_items_matched": 2,
"total_groups": 2
}
},
]
Need some help on how to write an aggregation pipeline to get the required result.
My current query is as follows:
{
"$match": {
"priority": "P1"
}
},
{
"$facet": {
"data": [
{
$addFields: {
"group_by_owners": "$owners"
}
},
{
$unwind: "$group_by_owners"
},
{
$group: {
"_id": "$group_by_owners",
"total_items_in_group": {
$sum: 1
},
"items": {
$push: "$$ROOT"
}
}
},
{
$sort: {
"total": -1
}
},
{
$unset: "items.group_by_owners"
},
{
$project: {
"_id": 1,
"total_items_in_group": 1,
"limited_items": {
$slice: [
"$items",
1
]
}
}
},
{
"$limit": 2
}
],
"metadata": [
{
$count: "total_items_matched"
}
]
}
}
Mongo playground link
I am unable to calculate the total number of groups.
add new stage of $addfields at the end of pipeline
db.collection.aggregate([
{
"$match": {
"priority": "P1"
}
},
{
"$facet": {
"data": [
{
$addFields: {
"group_by_owners": "$owners"
}
},
{
$unwind: "$group_by_owners"
},
{
$group: {
"_id": "$group_by_owners",
"total_items_in_group": {
$sum: 1
},
"items": {
$push: "$$ROOT"
}
}
},
{
$sort: {
"total": -1
}
},
{
$unset: "items.group_by_owners"
},
{
$project: {
"_id": 0,
"group_key": "$_id",
"total_items_in_group": 1,
"limited_items": {
$slice: [
"$items",
1
]
}
}
},
{
"$limit": 2
}
],
"metadata": [
{
$count: "total_items_matched",
}
]
}
},
{
"$addFields": {
"metadata.total_groups": {
"$size": "$data"
}
}
}
])
https://mongoplayground.net/p/y5a0jvr6fxI
Currently can't figure out why one pipeline works and the other doesn't. I got both pipelines from MongoDB charts and they both returned something and displaying charts on MongoDBCharts. However, when I use them in my code, only the first pipeline returns something. I used the same data for all cases. Any suggestions would be greatly appreciated!
The first one doesn't filter the last 30 days (hard coded by Mongo), both pipelines are copied from Mongodb charts and are not altered.
[
{
"$addFields": {
"trigger_time": {
"$convert": {
"input": "$trigger_time",
"to": "date",
"onError": null
}
}
}
},
{
"$match": {
"event_type": {
"$nin": [
null,
"",
"AC Lost",
"Device Lost",
"logged into Database",
"logged into Nexus Database",
"logged out of Nexus Database",
"Low Battery"
]
}
}
},
{
"$addFields": {
"trigger_time": {
"$cond": {
"if": {
"$eq": [
{
"$type": "$trigger_time"
},
"date"
]
},
"then": "$trigger_time",
"else": null
}
}
}
},
{
"$addFields": {
"__alias_0": {
"hours": {
"$hour": "$trigger_time"
}
}
}
},
{
"$group": {
"_id": {
"__alias_0": "$__alias_0"
},
"__alias_1": {
"$sum": 1
}
}
},
{
"$project": {
"_id": 0,
"__alias_0": "$_id.__alias_0",
"__alias_1": 1
}
},
{
"$project": {
"y": "$__alias_1",
"x": "$__alias_0",
"_id": 0
}
},
{
"$sort": {
"x.hours": 1
}
},
{
"$limit": 5000
}
]
The second one
[
{
"$addFields": {
"trigger_time": {
"$convert": {
"input": "$trigger_time",
"to": "date",
"onError": null
}
}
}
},
{
"$match": {
"event_type": {
"$nin": [
null,
"",
"AC Lost",
"Device Lost",
"logged into Database",
"logged into Nexus Database",
"logged out of Nexus Database",
"Low Battery"
]
},
"trigger_time": {
"$gte": {
"$date": "2021-03-29T08:35:47.804Z"
}
}
}
},
{
"$addFields": {
"trigger_time": {
"$cond": {
"if": {
"$eq": [
{
"$type": "$trigger_time"
},
"date"
]
},
"then": "$trigger_time",
"else": null
}
}
}
},
{
"$addFields": {
"__alias_0": {
"hours": {
"$hour": "$trigger_time"
}
}
}
},
{
"$group": {
"_id": {
"__alias_0": "$__alias_0"
},
"__alias_1": {
"$sum": 1
}
}
},
{
"$project": {
"_id": 0,
"__alias_0": "$_id.__alias_0",
"__alias_1": 1
}
},
{
"$project": {
"y": "$__alias_1",
"x": "$__alias_0",
"_id": 0
}
},
{
"$sort": {
"x.hours": 1
}
},
{
"$limit": 5000
}
]
I end up solving my own problem. After a bit of digging and asking.
Node.js does some funny things with Mongodb when it comes to using '$date', that's why the pipeline didn't work.
The resolve was to remove '$date' and pass in a date object. For my case,
"trigger_time": {
"$gte": new Date("2021-03-29T08:35:47.804Z")
}
Data in mongo
[{
"_id": "5d71d1432f7c8151c58c4481",
"payment": {
"transactions": [
{
"_id": "5d71d1ff2f7c8151c58c44cf",
"method": "paytm",
"amount": 100,
"paymentOn": "2019-09-06T03:26:44.959Z"
},
{
"_id": "5d71d1ff2f7c8151c58c44ce",
"method": "cash",
"amount": 650,
"paymentOn": "2019-09-06T03:26:55.531Z"
}
],
"status": "partial"
},
"customer": "5d66c434c24f2b1fb6772014",
"order": {
"orderNumber": "WP-ORD-06092019-001",
"total": 770,
"balance": 20
}
},
{
"_id": "5d71d1432f7c8151c58c4481",
"payment": {
"transactions": [
{
"_id": "5d71d1ff2f7c8151c58c44cf",
"method": "paytm",
"amount": 100,
"paymentOn": "2019-09-06T03:26:44.959Z"
}
],
"status": "partial"
},
"customer": "5d66c434c24f2b1fb6772014",
"order": {
"orderNumber": "WP-ORD-06092019-001",
"total": 200,
"balance": 100
}
}]
I want to aggregate payments by method.
So the result would look like below:
Output:
Paytm: 200
Cash : 650
Unpaid(Balance): 120
I have tried:
[
{
'$unwind': {
'path': '$payment.transactions',
'preserveNullAndEmptyArrays': true
}
}, {
'$project': {
'amount': '$payment.transactions.amount',
'method': '$payment.transactions.method'
}
}, {
'$group': {
'_id': '$method',
'amount': {
'$sum': '$amount'
}
}
}
]
But how to include balance calculation as well
Using the above dataset, use the aggregate pipeline for calculation using aggregate as:
db.collection.aggregate([
{
$facet: {
paidAmounts: [
{ '$unwind': { 'path': '$payment.transactions', 'preserveNullAndEmptyArrays': true } },
{
$group: {
_id: "$payment.transactions.method",
amount: {
$sum: "$payment.transactions.amount"
}
}
}
],
leftAmounts: [
{
$group: {
_id: null,
balance: {
$sum: "$order.balance"
}
}
}
]
}
}
])
giving output:
here leftAmounts has left balance and paidAmounts having grouped paid data on basis of payment type
[
{
"leftAmounts": [
{
"_id": null,
"balance": 120
}
],
"paidAmounts": [
{
"_id": "cash",
"amount": 650
},
{
"_id": "paytm",
"amount": 200
}
]
}
]
Working solution : https://mongoplayground.net/p/7IWELKKMsWe
db.collection.aggregate([
{
"$unwind": "$payment.transactions"
},
{
"$group": {
"_id": "$_id",
"balance": {
"$first": "$order.balance"
},
"paytm": {
"$sum": {
"$cond": [
{
"$eq": [
"$payment.transactions.method",
"paytm"
]
},
"$payment.transactions.amount",
0
]
}
},
"cash": {
"$sum": {
"$cond": [
{
"$eq": [
"$payment.transactions.method",
"cash"
]
},
"$payment.transactions.amount",
0
]
}
}
}
},
{
"$group": {
"_id": null,
"balance": {
"$sum": "$balance"
},
"cash": {
"$sum": "$cash"
},
"paytm": {
"$sum": "$paytm"
}
}
}
])
I have a collection with documents that look similar to this:
[
{
"_id": ObjectId("..."),
"date": ISODate("..."),
"type": "TypeA",
"color": "ColorA",
"soldFor": 12.15
},
{
"_id": ObjectId("..."),
"date": ISODate("..."),
"type": "TypeA",
"color": "ColorB",
"soldFor": 13.15
},
{
"_id": ObjectId("..."),
"date": ISODate("..."),
"type": "TypeB",
"color": "ColorA",
"soldFor": 12.15
},
{
"_id": ObjectId("..."),
"date": ISODate("..."),
"type": "TypeB",
"color": "ColorB",
"soldFor": 12.15
}
]
I know that this is not a good way to store such information, but unfortunately I have no influence in that.
What I need to get out of the collection is something like this:
[
2017: {
typeA: {
colorA: {
sum: 125.00
},
colorB: {
sum: 110.00
}
},
typeB: {
colorA: {
sum: 125.000
}
}
},
2016: {
typeA: {
colorB: {
sum: 125.000
}
}
}
]
At the moment I have two group stages that give me everything grouped by year, but I have no clue how to get the two other sub-groups. Building the sum would be a nice to have, but I am certain that I can figure out how that would be done in a group.
So far my pipeline looks like this:
[
{
$group: {
_id: { type: '$type', color: '$color', year: { $year: '$date' } },
docs: {
$push: '$$ROOT'
}
}
},
{
$group: {
_id: { year: '$_id.year' },
docs: {
$push: '$$ROOT'
}
}
}
]
which results in something like this:
[
{
"_id": {
"year": 2006
},
"docs": {
"_id": {
"type": "typeA",
"color": "colorA",
"year": 2006
},
"docs": [
{
... root document
}
]
}
},
{
"_id": {
"year": 2016
},
"docs": [
{
"_id": {
"type": "typeA",
"color": "colorB",
"year": 2016
},
"docs": [
{
... root document
}
]
}
... more docs with three keys in id
]
}
]
Help is much appreciated!
Using a cohort of operators found in MongoDB 3.4.4 and newer, i.e. $addFields, $arrayToObject and $replaceRoot, you can compose a pipeline like the following to get the desired result:
[
{ "$group": {
"_id": {
"year": { "$year": "$date" },
"type": "$type",
"color": "$color"
},
"count": { "$sum": "$soldFor" }
} },
{ "$group": {
"_id": {
"year": "$_id.year",
"type": "$_id.type"
},
"counts": {
"$push": {
"k": "$_id.color",
"v": { "sum": "$count" }
}
}
} },
{ "$addFields": {
"counts": { "$arrayToObject": "$counts" }
} },
{ "$group": {
"_id": "$_id.year",
"counts": {
"$push": {
"k": "$_id.type",
"v": "$counts"
}
}
} },
{ "$addFields": {
"counts": { "$arrayToObject": "$counts" }
} },
{ "$group": {
"_id": null,
"counts": {
"$push": {
"k": { "$substr": ["$_id", 0, -1 ]},
"v": "$counts"
}
}
} },
{ "$replaceRoot": {
"newRoot": {
"$mergeObjects": [
{ "$arrayToObject": "$counts" },
"$$ROOT"
]
}
} },
{ "$project": { "counts": 0 } }
]