Use the aggregate method with $unwind to parse out the scores array, followed by a $group and a $sum - mongodb

What I'm trying to do is add up the sum of the scores for any German Restaurant in Manhattan. I want to return the top 5 restaurants with their name and total score.
Here is the setup of the json data I'm working with:
{
"address": {
"building": "1007",
"coord": [ -73.856077, 40.848447 ],
"street": "Morris Park Ave",
"zipcode": "10462"
},
"borough": "Bronx",
"cuisine": "Bakery",
"grades": [
{ "date": { "$date": 1393804800000 }, "grade": "A", "score": 2 },
{ "date": { "$date": 1378857600000 }, "grade": "A", "score": 6 },
{ "date": { "$date": 1358985600000 }, "grade": "A", "score": 10 },
{ "date": { "$date": 1322006400000 }, "grade": "A", "score": 9 },
{ "date": { "$date": 1299715200000 }, "grade": "B", "score": 14 }
],
"name": "Morris Park Bake Shop",
"restaurant_id": "30075445"
}
I've tried multiple variations of this:
db.restaurants.aggregate([{$unwind: "$grades"}, {$group: {_id: {borough: "Manhattan", cuisine:"German"}, total:{$sum:"scores"}}}])
And this is what keeps getting returned. I'm new to MongoDb and I'm just not sure what I'm doing wrong.
{ "_id" : { "borough" : "Manhattan", "cuisine" : "German" }, "total" : 0 }

So here is an aggregation that might work assuming the average score is what is measured...
Query:
db.restaurants.aggregate([
{ $match: { "borough": "Manhattan", "cuisine": "German" } },
{ $unwind: "$grades" },
{ $group: {
"_id": "$name",
"avg_score": { $avg: "$grades.score" }
}
},
{ $sort: { "avg_score": -1 } },
{ $limit: 5 }
]).pretty()
Pipeline Explaination:
match on borough and cuisine to filter to only desirable conditions
Unwind the grades array to flatten structure for review
group on the restaurant name and get the average score
Sort on the average score descending to get the top scores at the
top of the list
Limit output to the top 5 restaurants.
EDIT:
Sort by sum instead of average...
db.restaurants.aggregate([
{ $match: { "borough": "Manhattan", "cuisine": "German" } },
{ $unwind: "$grades" },
{ $group: {
"_id": "$name",
"avg_score": { $avg: "$grades.score" },
"sum_score": { $sum: "$grades.score" }
}
},
{ $sort: { "sum_score": -1 } },
{ $limit: 5 }
]).pretty()

Related

mongodb query to find the min price in array of objects

My documents:
[{
"title": "lenovo x-100",
"brand": "lenovo",
"category": "laptops",
"variant": [{
"price": 30000,
"RAM": "4GB",
"storage": "256GB",
"screen": "full hd",
"chip": "i3"
}, {
"price": 35000,
"RAM": "8GB",
"storage": "512GB",
"screen": "full hd",
"chip": "i5"
}, {
"price": 40000,
"RAM": "12GB",
"storage": "2TB",
"screen": "uhd",
"chip": "i7"
}],
"salesCount": 32,
"buysCount": 35,
"viewsCount": 60
},
{
"title": "samsung12",
"brand": "lenovo",
"category": "mobile phones",
"variant": [{
"price": 11000,
"RAM": "4GB",
"ROM": "32GB"
}, {
"price": 16000,
"RAM": "6GB",
"ROM": "64GB"
}, {
"price": 21000,
"RAM": "8GB",
"ROM": "128GB"
}],
"salesCount": 48,
"buysCount": 39,
"viewsCount": 74
}
Expected output
{
_id:"lenovo",
minPrice:1100
}
I have tried this method of aggregation
[{
$match: {
brand: 'lenovo'
}
}, {
$group: {
_id: '$brand',
prices: {
$min: '$variant.price'
}
}
}, {
$unwind: {
path: '$prices'
}
}, {
$group: {
_id: '$_id',
minPrice: {
$min: '$prices'
}
}
}]
I want to find the minimum price based on the brand, this query is returning the expected output but is there any better way to get the expected outcome because using $unwind operator in quite expensive in the sense it may take longer execution time, hoping for positive response.Thanks in advance.
You can use $reduce to replace the second $group stage.
$match
$group - Push variant.price into new array and results nested array of array.
$project:
3.1. $reduce - Use to flatten the nested array from the result 2 by $concat the arrays into one.
3.2. $min - Select min value from the result 3.1.
db.collection.aggregate([
{
$match: {
brand: "lenovo"
}
},
{
$group: {
_id: "$brand",
prices: {
$push: "$variant.price"
}
}
},
{
$project: {
_id: 1,
minPrice: {
$min: {
"$reduce": {
"input": "$prices",
"initialValue": [],
"in": {
"$concatArrays": [
"$$value",
"$$this"
]
}
}
}
}
}
}
])
Sample Mongo Playground

Retrieve highest score for each game using aggregate in MongoDB

I am working on a database of various games and i want to design a query that returns top scorer from each game with specific player details.
The document structure is as follows:
db.gaming_system.insertMany(
[
{
"_id": "01",
"name": "GTA 5",
"high_scores": [
{
"hs_id": 1,
"name": "Harry",
"score": 6969
},
{
"hs_id": 2,
"name": "Simon",
"score": 8574
},
{
"hs_id": 3,
"name": "Ethan",
"score": 4261
}
]
},
{
"_id": "02",
"name": "Among Us",
"high_scores": [
{
"hs_id": 1,
"name": "Harry",
"score": 926
},
{
"hs_id": 2,
"name": "Simon",
"score": 741
},
{
"hs_id": 3,
"name": "Ethan",
"score": 841
}
]
}
]
)
I have created a query using aggregate which returns the name of game and the highest score for that game as follows
db.gaming_system.aggregate(
{ "$project": { "maximumscore": { "$max": "$high_scores.score" }, name:1 } },
{ "$group": { "_id": "$_id", Name: { $first: "$name" }, "Highest_Score": { "$max": "$maximumscore" } } },
{ "$sort" : { "_id":1 } }
)
The output from my query is as follows:
{ "_id" : "01", "Name" : "GTA 5", "Highest_Score" : 8574 }
{ "_id" : "02", "Name" : "Among Us", "Highest_Score" : 926 }
I want to generate output which also provides the name of player and "hs_id" of that player who has the highest score for each game as follows:
{ "_id" : "01", "Name" : "GTA 5", "Top_Scorer" : "Simon", "hs_id": 2, "Highest_Score" : 8574 }
{ "_id" : "02", "Name" : "Among Us", "Top_Scorer" : "Harry", "hs_id": 1, "Highest_Score" : 926 }
What should be added to my query using aggregate pipeline?
[
{
$unwind: "$high_scores" //unwind the high_scores, so you can then sort
},
{
$sort: {
"high_scores.score": -1 //sort the high_scores, irrelevant of game, because we are going to group in next stage
}
},
{
//now group them by _id, take the name and top scorer from $first (which is the first in that group as sorted by score in descending order
$group: {
_id: "$_id",
name: {
$first: "$name"
},
Top_Scorer: {
$first: "$high_scores"
}
}
}
]

Aggregation at each document level mongodb

I have a list of documents like this
[{
"_id": "5dbc95f921d7625303fe2369",
"name": "John",
"itemsPurchased": [{
"offer": "o1",
"items": ["p1"]
},{
"offer": "o1",
"items": ["p1"]
},
{
"offer": "o1",
"items": ["p2"]
},
{
"offer": "o2",
"items": ["p1"]
}, {
"offer": "o7",
"items": ["p1"]
}
]
},
{
"_id": "zbc95f921d7625303fe2363",
"name": "Doe",
"itemsPurchased": [{
"offer": "o1",
"items": ["p11"]
},{
"offer": "o1",
"items": ["p11"]
},
{
"offer": "o2",
"items": ["p13"]
},
{
"offer": "o1",
"items": ["p22"]
},
{
"offer": "o2",
"items": ["p11"]
}, {
"offer": "o3",
"items": ["p11"]
}
]
}
]
And i am trying to compute unique offers on unique products by each customer, expecting the resultant to be like:
[
{
"_id": "5dbc95f921d7625303fe2369",
"name": "John",
"offersAndProducts": {
"o1":2,
"o2":2,
"o3":1
},
{
"_id": "zbc95f921d7625303fe2363",
"name": "Doe",
"offersAndProducts": {
"o1":2,
"o2":1,
"o7":1
}
]
I want to apply aggregations per document, After performing $unwind on itemsPurchased, applied $group on items and then on offer to eliminate the duplication:
{
"$group" : {
"_id" : {
"item" : {
"$arrayElemAt" : [
"$itemsPurchased.item",
0.0
]
},
"count" : {
"$sum" : 1.0
},
"offer" : "$itemsPurchased.offer"
}
}
}
then,
{
"$group" : {
"_id" : "$_id.offer",
"count" : {
"$sum" : 1.0
}
}
}
this gives the array of products and offers for all documents:
[
{o1:4,o2:3,o3:1,o7:1}
]
But i need it at document level.
tried $addFeild, but $unwind and $match operators gives invalid error.
Any other way of achieving this?
Generally speaking, it's an anti-pattern to $unwind an array and then to $group on the original _id since most operations can be done on the array directly, in a single stage. Here is what such a stage would look like:
{$addFields:{
offers:{$arrayToObject:{
$map:{
input:{$setUnion:"$itemsPurchased.offer"},
as:"o",
in:[
"$$o",
{$size:{$setUnion:{$let:{
vars:{items:{$filter:{
input:"$itemsPurchased",
cond:{$eq:["$$this.offer","$$o"]}
}}},
in:{$reduce:{
input:"$$items",
initialValue:[],
in:{$concatArrays:["$$value","$$items.items"]}
}}
}}}
}]
}
}}
}}
What this does is create an array where each element is a two element array (which is a syntax that $arrayToObject can convert to an object where first element is key name and second is value) and the input is a unique set of offers and for each we accumulate an array of products, get rid of duplicates (with $setUnion) and then get the size of the result. What this produces on your input is this:
"offers" : {
"o1" : 2,
"o2" : 2,
"o3" : 1
}
You need to run $unwind and $group twice. To count only unique items you can use $addToSet. To build your keys dynamically you need to use $arrayToObject:
db.collection.aggregate([
{
$unwind: "$itemsPurchased"
},
{
$unwind: "$itemsPurchased.items"
},
{
$group: {
_id: {
_id: "$_id",
offer: "$itemsPurchased.offer"
},
name: { $first: "$name" },
items: { $addToSet: "$itemsPurchased.items" }
}
},
{
$group: {
_id: "$_id._id",
name: { $first: "$name" },
offersAndProducts: { $push: { k: "$_id.offer", v: { $size: "$items" } } }
}
},
{
$project: {
_id: 1,
name: 1,
offersAndProducts: { $arrayToObject: "$offersAndProducts" }
}
}
])
Mongo Playground

Grouping different amounts together in MongoDB

If I have a set of objects each with the same description, but with different amounts.
{
{
"_id": "101",
"description": "DD from my employer1",
"amount": 1000.33
},
{
"_id": "102",
"description": "DD from my employer1",
"amount": 1000.34
},
{
"_id": "103",
"description": "DD from my employer1",
"amount": 1000.35
},
{
"_id": "104",
"description": "DD from employer1",
"amount": 5000.00
},
{
"_id": "105",
"description": "DD from my employer2",
"amount": 2000.33
},
{
"_id": "106",
"description": "DD from my employer2",
"amount": 2000.33
},
{
"_id": "107",
"description": "DD from my employer2",
"amount": 2000.33
}
}
Below, I am able to group them using the description:
{
{
"$group": {
"_id": {
"description": "$description"
},
"count": {
"$sum": 1
},
"_id": {
"$addToSet": "$_id"
}
}
},
{
"$match": {
"count": {
"$gte": 3
}
}
}
}
Is there a way to include all the amounts in the group (_ids: 101, 102, and 103 plus 105,106,107) even if they have a small difference, but exclude the bonus amount, which in the sample above is _id 104?
I don't believe it could be done in a group stage, but is there something that could be done at a later stage that could group _ids 101, 102 and 103 together and exclude _id 104. Basically, I want MongoDB to ignore the small differences in 101, 102, 103 and group them together since the are paychecks coming from the same employer.
I have been working with $stdDevPop, but can't get a solid formula down.
I am looking for a simple array output of just the _ids.
{
"result": [
"101",
"102",
"103",
"105",
"106",
"107"
]
}
You can do this by doing some math on the "amount" to round it down to the nearest 1000 and use that as the grouping _id:
db.collection.aggregate([
{ "$group": {
"_id": {
"$subtract": [
{ "$trunc": "$amount" },
{ "$mod": [
{ "$trunc": "$amount" },
1000
]}
]
},
"results": { "$push": "$_id" }
}},
{ "$redact": {
"$cond": {
"if": { "$gt": [ { "$size": "$results" }, 1 ] },
"then": "$$KEEP",
"else": "$$PRUNE"
}
}},
{ "$unwind": "$results" },
{ "$group": {
"_id": null,
"results": { "$push": "$results" }
}}
])
If your MongoDB is older than 3.2 then you would just need to use a long form with $mod of what $trunc is doing. And if your MongoDB is older than 2.6 then rather than $redact you would $match. So in the longer form this is:
db.collection.aggregate([
{ "$group": {
"_id": {
"$subtract": [
{ "$subtract": [
"$amount",
{ "$mod": [ "$amount", 1 ] }
]},
{ "$mod": [
{ "$subtract": [
"$amount",
{ "$mod": [ "$amount", 1 ] }
]},
1000
]}
]
},
"results": { "$push": "$_id" },
"count": { "$sum": 1 }
}},
{ "$match": { "count": { "$gt": 1 } } },
{ "$unwind": "$results" },
{ "$group": {
"_id": null,
"results": { "$push": "$results" }
}}
])
Either way the output is just the _id values whose amounts grouped to the boundaries with a count more than once.
{ "_id" : null, "results" : [ "105", "106", "107", "101", "102", "103" ] }
You could either add a $sort in there or live with sorting the result array in client code.
db.yourDBNameHere.aggregate( [
{ $match: { "amount" : { $lt : 5000 } } },
{ $project: { _id: 1 } },
])
that will grab the ID only of every transaction less than 5000$.

How to compare and count each value of element with condition in mongoDB pipeline after unwinding?

This is my command I ran in tools->command
{
aggregate : "hashtags",
pipeline:
[
{$unwind:"$time"},
{$match:{"$time":{$gte:NumberInt(1450854385), $lte:NumberInt(1450854385)}}},
{$group:{"_id":"$word","count":{$sum:1}}}
]
}
which gave us this result
Response from server:
{
"result": [
{
"_id": "dear",
"count": NumberInt(1)
},
{
"_id": "ghost",
"count": NumberInt(1)
},
{
"_id": "rat",
"count": NumberInt(1)
},
{
"_id": "police",
"count": NumberInt(1)
},
{
"_id": "bugs",
"count": NumberInt(3)
},
{
"_id": "dog",
"count": NumberInt(2)
},
{
"_id": "batman",
"count": NumberInt(9)
},
{
"_id": "ear",
"count": NumberInt(1)
}
],
"ok": 1
}
The documents are in collection 'hashtags'
The documents inserted are as shown below
1.
{
"_id": ObjectId("567a483bf0058ed6755ab3de"),
"hash_count": NumberInt(1),
"msgids": [
"1583"
],
"time": [
NumberInt(1450854385)
],
"word": "ghost"
}
2.
{
"_id": ObjectId("5679485ff0058ed6755ab3dd"),
"hash_count": NumberInt(1),
"msgids": [
"1563"
],
"time": [
NumberInt(1450788886)
],
"word": "dear"
}
3.
{
"_id": ObjectId("567941aaf0058ed6755ab3dc"),
"hash_count": NumberInt(9),
"msgids": [
"1555",
"1556",
"1557",
"1558",
"1559",
"1561",
"1562",
"1584",
"1585"
],
"time": [
NumberInt(1450787170),
NumberInt(1450787292),
NumberInt(1450787307),
NumberInt(1450787333),
NumberInt(1450787354),
NumberInt(1450787526),
NumberInt(1450787615),
NumberInt(1450855148),
NumberInt(1450855155)
],
"word": "batman"
}
4.
{
"_id": ObjectId("567939cdf0058ed6755ab3d9"),
"hash_count": NumberInt(3),
"msgids": [
"1551",
"1552",
"1586"
],
"time": [
NumberInt(1450785157),
NumberInt(1450785194),
NumberInt(1450856188)
],
"word": "bugs"
}
So I want to count the number of values in the field 'time' which comes in between two limits
such as this
foreach word
{
foreach time
{
if((a<time)&&(time<b))
word[count]++
}
}
but my query is just giving output of the total size of array 'time'.
What is the correct query?
for eg
if lower bound is 1450787615 and upper bound is 1450855155
there are 3 values in 'time'. for word 'batman'
The answer should be
{
"_id": "batman",
"count": NumberInt(3)
},
for batman.Thank you.
Use the following aggregation pipeline:
db.hashtags.aggregate([
{
"$match": {
"time": {
"$gte": 1450787615, "$lte": 1450855155
}
}
},
{ "$unwind": "$time" },
{
"$match": {
"time": {
"$gte": 1450787615, "$lte": 1450855155
}
}
},
{
"$group": {
"_id": "$word",
"count": {
"$sum": 1
}
}
}
])
For the given sample documents, this will yield:
/* 0 */
{
"result" : [
{
"_id" : "batman",
"count" : 3
},
{
"_id" : "dear",
"count" : 1
},
{
"_id" : "ghost",
"count" : 1
}
],
"ok" : 1
}