How to average the summed up values in mongodb? - mongodb

Using MongoDB 2.4.8,
I have the following records
{
"category" : "TOYS",
"price" : 12,
"status" : "online",
"_id" : "35043"
}
{
"category" : "TOYS",
"price" : 13,
"status" : "offline",
"_id" : "35044"
}
{
"category" : "TOYS",
"price" : 22,
"status" : "online",
"_id" : "35045"
}
{
"category" : "BOOKS",
"price" : 13,
"status" : "offline",
"_id" : "35046"
}
{
"category" : "BOOKS",
"price" : 17,
"status" : "online",
"_id" : "35047"
}
I want to find the average price of each category whose status is online and total price is more than 50.
I am not sure how to construct this query.
So far, I can construct the query where I summed up and find out the total price for each category whose status is online.
db.products.aggregate([
{"$match":
{
{status:"online"}
}
},
{"$group" :
{
"_id": "$category",
"total_price": {$sum:"$price"},
}
}
])
I am not sure how to add more stages to this query to get the averages I am looking for.

You can just add more stages to your aggregation pipeline. For example:
db.items.aggregate([
{$match:
{
status:"online"
}
},
{$group :
{
_id: "$category",
total_price: {$sum:"$price"},
}
},
{$match:
{
total_price:{$gt:50}
}
},
{$group :
{
_id: "1",
avg_price: {$avg:"$total_price"},
}
},
]);
EDITTED based on clarifications

You can calculate the average product price per category in the $group step with the total, and then add an extra $match stage to limit the results to products with total of more than 50:
db.products.aggregate(
// Find matching products (can take advantage of index)
{ $match: {
status: "online"
}},
// Calculate total and average
{ $group: {
"_id": "$category",
"total_price": { $sum:"$price" },
"avg_price": { $avg:"$price"}
}},
// Limit results to price > 50
{ $match: {
"total_price" : { $gt: 50 }
}}
)
Note that with your example data, there would be no matching results for $gt:50 (you could instead try with $gt:30 to get the "TOYS" category as a match with total price of 34).
Averaging total prices for matching categories
If you want to get the average price for the total prices of the categories matching the limit, you can add an extra $group step at the end:
// Calculate the average total price
{ $group: {
"_id": null,
"total_average_price": { $avg:"$total_price"}
}}
Note that this extra grouping is going to reduce everything down to one number (the total_average_price) which may or may not be what you expect. You might want to save the intermediate results before running the aggregation with the last group, or just calculate the average in your application code if there aren't a lot of numbers to sum up.

Related

Querying aggregates on subdocuments then grouping by field in parent document

I'm a noob when it comes to Mongo and I've been struggling to wrap my head around how to fetch data in the following fashion. I have a collection of order documents that contain some data such as an event_id and a subcollection (if that's the term) of issued_tickets. issued_tickets contains one to many subdocuments that contain fields such as name, date, etc. What I am trying to do is fetch the number of each type of issued tickets for each event_id in the parent document. So I would be wanting to do a count on each issued_tickets grouped by issued_tickets.name and then that goes up to the parent which is then summed and grouped on the parent's event_id.
Can anyone help me accomplish this? I keep spinning myself out on trying groupings and projections still.
Here is a sample document:
{
"_id" : ObjectId("5ce7335c1c666f000414f74a"),
"event_id" : ObjectId("5cb54f966668a9719ef6a103"),
"subtotal" : 3000,
"service_fee" : 760,
"processing_fee" : 143,
"total" : 3903,
"customer_id" : ObjectId("5ce7666c1c335f000414f747"),
"updated_at" : ISODate("2019-05-23T23:57:17.524Z"),
"created_at" : ISODate("2019-05-23T23:57:17.524Z"),
"ref" : "60d5fcf9-86c6-469b-b86b-315a9b55caca",
"issued_tickets" : [
{
"_id" : ObjectId("5ce7335c1c335f000414f666"),
"name" : "Tier 1",
"stub_name" : "Tier 1",
"price" : 1500,
"base_fee" : 200,
"perc_fee" : "0.12",
"access_code" : "163a1b9ee98338a8a4288a1c87446665",
"redeemed" : false
},
{
"_id" : ObjectId("5ce7335c1c335f0004146669"),
"name" : "Tier 2",
"stub_name" : "Tier 2",
"price" : 1500,
"base_fee" : 200,
"perc_fee" : "0.12",
"access_code" : "f50f262cd0bf1ec4ab36667c2a762446",
"redeemed" : true
}
]
}
We can do aggregations like following
$unwind to deconstruct the array
$group to reconstruct the array. While regrouping by eventId and issued_tickets.name, we can count using $sum
Mongo script :
db.collection.aggregate([
{
$unwind: "$issued_tickets"
},
{
$group: {
_id: {
_id: "$event_id",
ticketName: "$issued_tickets.name"
},
count: {
$sum: 1
}
}
},
{
$project: {
event_id: "$_id._id",
ticketName: "$_id.ticketName",
count: 1,
_id: 0
}
}
])
Working Mongo playground

How to spot an outlier in MongoDB

Assume the following records in mongodb
{
_id: // primary key
age: // some age.
}
The system generates primary key and is guaranteed to be increasing monotonically.
The business logic provides value for age. Age should be increasing, however due to a bug, under some remote cases, the age could be decreasing.
Eg: age could go from 1 yr, 2 yr, 3yr, "2 yr", 4yr, 5yr etc.
How to write a query to spot the outlier in the age ?
Assuming your collection is called 'junk' (sorry, no bad intentions here) I think this might work...
db.junk.aggregate([
{$lookup: {
from: "junk",
let: { age: "$age", id: "$_id" },
pipeline: [
{ $match :
{ $expr:
{ $and:
[
{$gt: ["$_id", "$$id"]},
{ $lt: ["$age", "$$age"] }
]
}
}
}
],
as: "data"
}},
{ $project: { _id: 1, "age": 1, "data": 1, "found": { $gt: [{ $size: "$data" }, 0] } } },
{ $match : { found: true }}
])
The intent is to self join on the same collection where the id is greater than another document, but the age is less for the same document. Count how many records are in this collection, and if the count is greater than 0 output.
Example Collections:
So, for testing this I populated a collection called 'junk' with 7 documents...
> db.junk.find()
{ "_id" : ObjectId("5daf4700090553aca6da1535"), "age" : 0 }
{ "_id" : ObjectId("5daf4700090553aca6da1536"), "age" : 1 }
{ "_id" : ObjectId("5daf4700090553aca6da1537"), "age" : 2 }
{ "_id" : ObjectId("5daf471b090553aca6da1538"), "age" : 3 }
{ "_id" : ObjectId("5daf471e090553aca6da1539"), "age" : 4 }
{ "_id" : ObjectId("5daf4721090553aca6da153a"), "age" : 3 }
{ "_id" : ObjectId("5daf4724090553aca6da153b"), "age" : 5 }
Results:
Here is what my results look like after running this query...
{ "_id" : ObjectId("5daf471e090553aca6da1539"), "age" : 4, "data" : [ { "_id" : ObjectId("5daf4721090553aca6da153a"), "age" : 3 } ], "found" : true }
It found a record having a later outlier (ObjectId 5daf471e090553aca6da1539 precedes the outlier, ObjectId 5daf4721090553aca6da153a is the outlier). Obviously this could be projected differently to show just the outlier, but I wanted to first verify the query works as expected and not invest more time on a inadequate approach.

Building a pipeline and aggregate in Mongo

How do I aggregate the below collection of document type to sum the quantity of all product_id sold based on each district_id and city_id within a period of time
I tried using the aggregate functions of $match, $group but haven't been successful.
{
"_id" : ObjectId("5b115e00a186ae19062b0714"),
"id" : 86164014,
"cost" : 3,
"created_date" : "2017-04-04 21:44:14",
"quantity" : 12,
"bill_id" : 46736603,
"product_id" : 24,
"bill_date" : "2017-04-04",
"district_id" : 75
"city_id": 21
}
You should be more specific about the "within a period of time" and which field we should consider, but the query for the first part could be this one:
db.getCollection("your collection").aggregate([
{
$group: {
_id: {
city_id: "$city_id",
district_id: "$district_id"
},
quantities: { $sum: "$quantity" }
}
}
])

mongo db how to write a function in an query maybe aggregation?

The question is Calculate the average age of the users who have more than 3 strengths listed.
One of the data is like this :
{
"_id" : 1.0,
"user_id" : "jshaw0",
"first_name" : "Judy",
"last_name" : "Shaw",
"email" : "jshaw0#merriam-webster.com",
"age" : 39.0,
"status" : "disabled",
"join_date" : "2016-09-05",
"last_login_date" : "2016-09-30 23:59:36 -0400",
"address" : {
"city" : "Deskle",
"province" : "PEI"
},
"strengths" : [
"star schema",
"dw planning",
"sql",
"mongo queries"
],
"courses" : [
{
"code" : "CSIS2300",
"total_questions" : 118.0,
"correct_answers" : 107.0,
"incorect_answers" : 11.0
},
{
"code" : "CSIS3300",
"total_questions" : 101.0,
"correct_answers" : 34.0,
"incorect_answers" : 67.0
}
]
}
I know I need to count how many strengths this data has, and then set it to $gt, and then calculate the average age.
However, I don't know how to write 2 function which are count and average in one query. Do I need to use aggregation, if so, how?
Thanks so much
Use $redact to match your array size & $group to calculate the average :
db.collection.aggregate([{
"$redact": {
"$cond": [
{ "$gt": [{ "$size": "$strengths" }, 3] },
"$$KEEP",
"$$PRUNE"
]
}
}, {
$group: {
_id: 1,
average: { $avg: "$age" }
}
}])
The $redact part match the size of strenghs array greater than 3, it will $$KEEP record that match this condition otherwise $$PRUNE the record that don't match. Check $redact documentation
The $group just perform an average with $avg

mongodb - filter out some values when doing $unwind

I have the following Customer Order data in mongodb
"_id" : 7,
"customer name" : "John Smith",
"OrderItem" : [
{
"product_category" : "Mobile",
"price" : 900
},
{
"product_category" : "Computer",
"price" : 4200.48
},
{
"product_category" : "TV",
"price" : 670.20
},
{
"product_category" : "TV",
"price" : 960.52
}
]
I need to average each product category to be like this:
"_id" : 7,
"customer name" : "John Smith",
"OrderItem" : [
{
"product_category" : "Mobile",
"price" : 900
},
{
"product_category" : "Computer",
"price" : 4200.48
},
{
"product_category" : "TV",
"price" : 815.36
}
]
i tried to use $unwind but not sure how to group them . any help ?
Use aggregation framework with a pipeline which consists of the following stages: a $match operation in the first pipeline stage filters the document stream to allow only matching documents (document with _id = 7 in your case) to pass unmodified into the next pipeline stage, which is the $unwind operation. This deconstructs the desired OrderItem array field from the input documents to output a document for each element that you can then group on and do the aggregation operation of finding the average of the category prices. The next stage in the pipeline is the $group operation which then groups input documents by product_category and applies the $avg expression to each group on the price. The last stage $project then reshapes each document in the stream to produce the desired outcome. Thus your aggregation would look like:
db.collection.aggregate([
{
"$match": {"_id": 7}
},
{
"$unwind": "$OrderItem"
},
{
"$group": {
"_id": "$OrderItem.product_category",
"average_price": {
"$avg": "$OrderItem.price"
}
}
},
{
"$project": {
"_id": 0,
"product_category" : "$_id",
"average_price": 1
}
}
])
Result:
{
"result" : [
{
"average_price" : 4200.48,
"product_category" : "Computer"
},
{
"average_price" : 815.36,
"product_category" : "TV"
},
{
"average_price" : 900,
"product_category" : "Mobile"
}
],
"ok" : 1
}
First you should unwind OrderItem then group them and mongo $avg to calculate avarage. Below aggregation will calculate avg
db.collectionName.aggregate(
{"$match":{"customer name":"John Smith"}}, // match specified customername
{"$unwind":"$OrderItem"}, // unwind the OrderItem
{"$group":{"_id":"$OrderItem.product_category",
"avg": {"$avg":"$OrderItem.price"} // mongo avg method used for avrage
}}
).pretty()
So above query return following results
{ "_id" : "Computer", "avg" : 4200.48 }
{ "_id" : "TV", "avg" : 815.36 }
{ "_id" : "Mobile", "avg" : 900 }
But above result not match your given expected output, so you should group twice to get exact output
db.collectionName.aggregate(
{"$match":{"customer name":"John Smith"}}, //match given criteria
{"$unwind":"$OrderItem"}, //unwind $OrderItem
{"$group":{"_id":"$OrderItem.product_category",
"customerName":{"$first":"$customer name"}, // group all data with calculating avg
"id":{"$first":"$_id"},
"avg":{"$avg":"$OrderItem.price"}}},
{"$group":{"_id":"$id",
"customer Name":{"$first":"$customerName"},
"OrderItem":{"$push": {"product_category":"$_id","price":"$avg"}}}} // group them for expected output
).pretty()
.aggregate([
{$unwind: "$OrderItem"},
{$group: {
_id: {id: "$_id", cat: "$OrderItem.product_category"},
name: {$first: "$customer name"},
price: {$avg: "$OrderItem.price"}
}},
{$group: {
_id: "$_id.id",
OrderItem: {$push: {product_category: "$_id.cat", price: "$price"}},
"customer name": {$first: "$name"}
}}
])