MongoDB percentage query with multiple $project stages in aggregate - mongodb

I am new to MongoDB. I am looking for expert views about the query I wrote. basically I am calculating the percentage of skills that were in demand in the past 90days. The Query I have written gives the desired output but I feel like this query can be optimized. Kindly point out my mistakes so I can optimize this query with a better understanding of MongoDB.
Sample Document:
{
"_id":{
"$oid":"630a2ba9fe850ebc2d2f4a25"
},
"category":{
"name":"SQL",
"path":",web development,",
"id":{
"$oid":"62fe35f3f1793d2014bfe05f"
}
},
"createdAt":{
"$date":{
"$numberLong":"1661610921812"
}
}
}
My Query:
const SkillsInDemand = await Job.aggregate([
{
$match: { createdAt: { $gte: new Date((new Date().getTime() - (90 * 24 * 60 * 60 * 1000))) }}
},
{
$group: { _id: "$category.name",count: { $sum: 1 }}
},
{
$project: {count: "$count",total: {$sum:count} }
},
{
$project: { percentage: { $multiply: [{ $divide: ["$count", "$total" ] }, 100] }}
},
{
$sort: { percentage: -1 }
}
])
$group stage output:
[
{ _id: 'Flutter', count: 1 },
{ _id: 'SQL', count: 2 },
{ _id: 'Python', count: 9 }
]
1st $project stage output:
[
{ _id: 'Python', count: 9, total: 12 },
{ _id: 'Flutter', count: 1, total: 12 },
{ _id: 'SQL', count: 2, total: 12 }
]
2nd $project stage output:
[
{ _id: 'Python', percentage: 75 },
{ _id: 'Flutter', percentage: 8.333333333333332 },
{ _id: 'SQL', percentage: 16.666666666666664 }
]
Final Output:
[
{ _id: 'Python', percentage: 75 },
{ _id: 'SQL', percentage: 16.666666666666664 },
{ _id: 'Flutter', percentage: 8.333333333333332 }
]

Related

MongoDB add grand total to sortByCount() in an aggregation pipeline

I have grouped all the users by country, but I would also like to have a row showing the grand total (users are tagged to a single country in our use case).
Data Model / Sample Input
The collection is filled with objects representing a country (name) and each contains a list of user objects in an array under users.
{ _id: ObjectId("..."),
name: 'SG',
type: 'COUNTRY',
increment: 200,
users:
[ ObjectId("..."),
ObjectId("..."),
...
Query
db.collection.aggregate([{$match:{type:"COUNTRY"}},{$unwind:"$users"},{$sortByCount:"$name"}])
Current Results
{ _id: 'SG', count: 76 }
{ _id: 'IN', count: 6 }
{ _id: 'US', count: 4 }
{ _id: 'FR', count: 3 }
{ _id: 'UK', count: 2 }
{ _id: 'RU', count: 1 }
{ _id: 'CO', count: 1 }
{ _id: 'DK', count: 1 }
{ _id: 'ID', count: 1 }
{ _id: 'PH', count: 1 }
Expected Results
{ _id: 'SG', count: 76 }
{ _id: 'IN', count: 6 }
{ _id: 'US', count: 4 }
{ _id: 'FR', count: 3 }
{ _id: 'UK', count: 2 }
{ _id: 'RU', count: 1 }
{ _id: 'CO', count: 1 }
{ _id: 'DK', count: 1 }
{ _id: 'ID', count: 1 }
{ _id: 'PH', count: 1 }
{ _id: null, count: 96 } <<< TOTAL COUNT ADDED
Any tips to achieve this without resorting to complex or dirty tricks?
You can also try using $facet to calculate counts by country name and total count, and then combine them together. Something like this:
db.collection.aggregate([
{
$match: {
type: "COUNTRY"
}
},
{
"$unwind": "$users"
},
{
"$facet": {
"groupCountByCountry": [
{
"$sortByCount": "$name"
}
],
"totalCount": [
{
"$group": {
"_id": null,
"count": {
"$sum": 1
}
}
}
]
}
},
{
"$project": {
array: {
"$concatArrays": [
"$groupCountByCountry",
"$totalCount"
]
}
}
},
{
"$unwind": "$array"
},
{
"$replaceRoot": {
"newRoot": "$$ROOT.array"
}
}
])
Here's the playground link.
I recommend just doing this in memory as the alternative is "hacky" but in order to achieve this in Mongo you just need to group all documents, add a new documents and unwind again, like so:
db.collection.aggregate([
{
$group: {
_id: null,
roots: {
$push: "$$ROOT"
},
sum: {
$sum: "$count"
}
}
},
{
$addFields: {
roots: {
"$concatArrays": [
"$roots",
[
{
_id: null,
count: "$sum"
}
]
]
}
}
},
{
$unwind: "$roots"
},
{
$replaceRoot: {
newRoot: "$roots"
}
}
])
Mongo Playground

MongoDB - Query calculation and group multiple items

Let's say I have this data:
{"Plane":"5546","Time":"55.0", City:"LA"}
{"Plane":"5548","Time":"25.0", City:"CA"}
{"Plane":"5546","Time":"6.0", City:"LA"}
{"Plane":"5548","Time":"5.0", City:"CA"}
{"Plane":"5555","Time":"15.0", City:"XA"}
{"Plane":"5555","Time":"8.0", City:"XA"}
and more but I just visualize the data
I want to calculate and group all the time and plane, this is expected output:
{"_id:":["5546","LA"],"Sum":2,"LateRate":1,"Prob"0.5}
The sum is sum all the time, Late is sum all the time with time > "15" and Prob is Late/Sum
The code I have tried but it still is missing something:
db.Collection.aggregate([
{
$project: {
Sum: 1,
Late: {
$cond: [{ $gt: ["$Time", 15.0] }, 1, 0]
},
prob:1
}
},
{
$group:{
_id:{Plane:"$Plane", City:"$City"},
Sum: {$sum:1},
Late: {$sum: "$Late"}
}
},
{
$addFields: {
prob: {
"$divide": [
"$Late",
"$Sum"
]
}
}
},
])
db.collection.aggregate([
{
$project: {
Time: 1,
Late: {
$cond: [
{
$gt: [
{
$toDouble: "$Time"
},
15.0
]
},
"$Time",
0
]
},
prob: 1,
Plane: 1,
City: 1
}
},
{
$group: {
_id: {
Plane: "$Plane",
City: "$City"
},
Sum: {
$sum: {
"$toDouble": "$Time"
}
},
Late: {
$sum: {
$toDouble: "$Late"
}
}
}
},
{
$addFields: {
prob: {
"$divide": [
"$Late",
"$Sum"
]
}
}
}
])
Project limits the fields passed to the next stage
On string, you cannot perform all relational/arithmetic operations
Playground

Aggregate Hourly Weekly Monthly Yearly data in mongodb

Q1. I need to filter data by created date and driverId then need to sum up the total by Hourly, Weekly, Monthly, and Yearly. I already checked with other solutions but it doesn't help much.
Sample Data:
[
{
id: "1",
created : "2022-01-04T03:22:18.739Z",
completed: "2022-01-06T03:53:28.463Z",
driverId: "B-72653",
total: 15,
},
{
id: "2",
created : "2022-01-01T03:22:18.739Z",
completed: "2022-01-02T03:53:28.463Z",
driverId: "B-72653",
total: 33
},
{
id: "3",
created : "2021-08-26T01:22:18.739Z",
completed: "2021-08-26T09:53:28.463Z",
driverId: "B-72653",
total: 43
},
{
id: "4",
created : "2021-03-26T02:22:18.739Z",
completed: "2021-03-26T07:53:28.463Z",
driverId: "B-73123",
total: 35
},
]
Response needed:
{
Hourly:[10,5,5,6,7,8,4,5,6,3,44,2,1,2,3,44,5,6,75,4,3,2,1], // 24 Hours (Each Hour Total)
Weekly:[10,30,34,45,56,67,78], // 7 days (Each Day Total)
Monthly:[10,30,34,45,56,67,78,55,44,33,22,12], // 12 Months (Each Month Total)
Yearly: [10,30] // Year Total (Each Year Total)
}
Q2. How can we filter nested array by-products > brand id and get the sum of product price by its id and filter by Hourly, Weekly, Monthly, Yearly?.
You can use $group with _id being $hour / $week / $month / $year to aggregate the sum. $push them into an array to get your expected result.
Use $facet to repeat the process for all 4 cases.
db.collection.aggregate([
{
"$facet": {
"Hourly": [
{
$group: {
_id: {
$hour: "$created"
},
total: {
$sum: "$total"
}
}
},
{
$sort: {
_id: 1
}
},
{
$group: {
_id: null,
result: {
$push: {
hour: "$_id",
total: "$total"
}
}
}
}
],
Weekly: [
{
$group: {
_id: {
"$week": "$created"
},
total: {
$sum: "$total"
}
}
},
{
$sort: {
_id: 1
}
},
{
$group: {
_id: null,
result: {
$push: {
week: "$_id",
total: "$total"
}
}
}
}
],
Monthly: [
{
$group: {
_id: {
$month: "$created"
},
total: {
$sum: "$total"
}
}
},
{
$sort: {
_id: 1
}
},
{
$group: {
_id: null,
result: {
$push: {
month: "$_id",
total: "$total"
}
}
}
}
],
Yearly: [
{
$group: {
_id: {
$year: "$created"
},
total: {
$sum: "$total"
}
}
},
{
$sort: {
_id: 1
}
},
{
$group: {
_id: null,
result: {
$push: {
year: "$_id",
total: "$total"
}
}
}
}
]
}
},
{
"$addFields": {
"Hourly": {
"$arrayElemAt": [
"$Hourly",
0
]
},
"Weekly": {
"$arrayElemAt": [
"$Weekly",
0
]
},
"Monthly": {
"$arrayElemAt": [
"$Monthly",
0
]
},
"Yearly": {
"$arrayElemAt": [
"$Yearly",
0
]
}
}
},
{
"$addFields": {
"Hourly": "$Hourly.result",
"Weekly": "$Weekly.result",
"Monthly": "$Monthly.result",
"Yearly": "$Yearly.result"
}
}
])
Here is the Mongo playground for your reference.

Mongo Facet Aggregation with Sum

Trying to figure out something simple in this aggregation. The field "totalArrests" under metadata is coming back 0. It's not able to sum this field from the previous stage for some reason. Please advise.
const agg = await KID.aggregate([
{
$group: {
_id: "$source", // group by this
title: { "$last": "$title"},
comments: { "$last": "$comments"},
body: { "$last": "$body"},
date: { "$last": "$date"},
media: { "$last": "$media"},
source: { "$last": "$source"},
count: { "$sum": 1},
arrestCount: { "$sum": "$arrested"},
rescuedCount: { "$sum": "$rescued"},
}
},
// sorting
{
$sort: {date: sort}
},
// facets for paging
{
$facet: {
metadata: [
{ $count: "total" }, // Returns a count of the number of documents at this stage
{ $addFields: {
page: page,
limit: 30,
totalArrests: {$sum: "$arrestCount"}
}},
],
kids: [ { $skip: (page-1)*30 }, { $limit: 30 } ]
}
},
]);
Here is a sample document in the collection.
[
{
_id: 5e8b922aaf5ccf5ac588398c,
counter: 4,
date: 2017-01-01T17:00:00.000Z,
name: 'Steven Tucker',
arrested: 1,
rescued: 0,
country: 'US',
state: 'NH',
comments: 'Sex trafficking of a minor',
source: 'https://www.justice.gov/opa/pr/new-hampshire-man-indicted-sex-trafficking-minor-connection-interstate-prostitution',
title: 'New ....',
body: 'Steven Tucker, 31, ....',
__v: 0,
media: {
title: 'New Hampshire Man Indicted for Sex ...',
open_graph: [Object],
twitter_card: [Object],
favicon: 'https://www.justice.gov/sites/default/files/favicon.png'
},
id: 5e8b922aaf5ccf5ac588398c,
text: 'New Hampshire Man Indicted',
utcDate: '2017-01-01T12:00'
}
]
$count will only provide you the count for number of documents and escapes all the other things.
So, You have to use one more pipeline in $facet in order to get the documents.
{ $facet: {
metadata: [
{ $group: {
_id: null,
total: { $sum: 1 },
totalArrested: { $sum: "$arrestCount" }
}},
{ $project: {
total: 1,
totalArrested: 1,
page: page,
limit: 30,
hasMore: { $gt: [{ $ceil: { $divide: ["$total", 30] }}, page] }
}}
],
kids: [{ $skip: (page-1) * 30 }, { $limit: 30 }]
}}

MongoDB multiple nested groups

I have documents in mongodb like this
{
_id: "5cfed55974c7c52ecc33ada8",
name: "Garona",
realm: "Blackrock",
faction: "Horde",
race: "Orc",
class: "Rogue",
guild: "",
level: 33,
lastSeen: "2019-06-10T00:00:00.000Z",
__v: 0
},
{
_id: "5cfed55974c7c52ecc33ade8",
name: "Muradin",
realm: "Alleria",
faction: "Alliance",
race: "Dwarf",
class: "Warrior",
guild: "Stormstout Brewing Co",
level: 42,
lastSeen: "2019-06-11T00:00:00.000Z",
__v: 0
}
What I'm trying to do, is to group by a fields and get a sum of it. So far I figured it out to do it for one field at once like so
{
$group: {
_id: {
classes: '1',
class: '$class'
},
total: { $sum: 1 }
}
},
{
$group: {
_id: '$_id.classes',
total: { $sum: '$total' },
classes: {
$push: {
class: '$_id.class',
total: '$total'
}
}
}
}
Which produces something like this
{
_id: "1",
total: 40,
classes: [
{
class: "Warrior",
total: 17
},
{
class: "Rogue",
total: 23
}
}
But I want to do it for more than one field at once, so that I can get an output like this.
{
_id: "1",
total: 40,
classes: [
{
class: "Warrior",
total: 17
},
{
class: "Rogue",
total: 23
},
factions: [
{
faction: "Alliance",
total: 27
},
{
faction: "Horde",
total: 13
}
}
No I'm wondering if it is even possible to do it in one query in an easy way or if I would be better to do a seperate query for each field.
You can do this by using the $facet aggregation stage
Processes multiple aggregation pipelines within a single stage on the same set of input documents. Each sub-pipeline has its own field in the output document where its results are stored as an array of documents.
I only slightly modified your original pipeline, and then just copied it for the 'factions' field.
The last 3 stages in my solution aren't really necessary, they just clean up the output a little bit.
You can probably take it from here, good luck.
db.collection.aggregate([
{
"$facet": {
"classes": [
{
$group: {
_id: "$class",
total: {
$sum: 1
}
}
},
{
$group: {
_id: null,
total: {
$sum: "$total"
},
"classes": {
$push: {
class: "$_id",
total: "$total"
}
}
}
}
],
"factions": [
{
$group: {
_id: "$faction",
total: {
$sum: 1
}
}
},
{
$group: {
_id: null,
total: {
$sum: "$total"
},
"factions": {
$push: {
faction: "$_id",
total: "$total"
}
}
}
}
]
}
},
{
$unwind: "$classes"
},
{
$unwind: "$factions"
},
{
$project: {
"classes._id": 0,
"factions._id": 0
}
}
])
Output
[
{
"classes": {
"classes": [
{
"class": "Warrior",
"total": 1
},
{
"class": "Rogue",
"total": 1
}
],
"total": 2
},
"factions": {
"factions": [
{
"faction": "Alliance",
"total": 1
},
{
"faction": "Horde",
"total": 1
}
],
"total": 2
}
}
]