MongoDB group by subdocument counts and year - mongodb

Here is a sample of my document from collection called products:
{
"_id" : "B000KIT6LQ",
"brand" : "unknown",
"category" : "Electronics",
"price" : "11.99",
"title" : "Scosche KA2067B 2005..."
"reviews" : [
{
"date" : ISODate("1969-12-31T23:59:59Z"),
"score" : 5,
"user_id" : "AK7M5Y7E9O3L7",
"sentiment" : 0.5,
"text" : "Bought this so I ...",
"user_gender" : "female",
"voted_total" : 0,
"voted_helpful" : 0,
"user_name" : "Alex",
"summary" : "It is what it is"
},
{
"date" : ISODate("1969-12-31T23:59:59Z"),
"score" : 5,
"user_id" : "A26VRLMPEA8IDR",
"sentiment" : 0.352,
"text" : "Years ago I worked as an...",
"user_gender" : "male",
"voted_total" : 0,
"voted_helpful" : 0,
"user_name" : "Jack R. Smith",
"summary" : "Great Kit"
},
{
"date" : ISODate("1969-12-31T23:59:59Z"),
"score" : 4,
"user_id" : "A1TGBDVX3QXCRH",
"sentiment" : 0.19318181818181818,
"text" : "This insert works great in my ...",
"user_gender" : "female",
"voted_total" : 0,
"voted_helpful" : 0,
"user_name" : "J. Reed",
"summary" : "Fits great in my 2006 Spectra5"
}
]
}
I have many documents with multiple categories. I am trying to create a mongo query which will result in all categories with the number of reviews (subdocument) per year. I have to group by categories and year, and get the count for number of reviews.
This is the query that I have got so far:
db.products.aggregate([
{ $unwind : "$reviews" },
{ $group: {
_id: {category: "$category", date: "$reviews.date.getFullYear()"},
count: { $sum: 1 }}},
{$sort:{"count": -1}}
])
For some reason the getFullYear() method is not working for me. If I group by $reviews.date I get the results.
Any pointers on getting the query right is appreciated.
Thanks

You can't use JavaScript functions like getFullYear() in your aggregate pipeline, you need to use the equivalent aggregation Date operator, which in this case is $year.
db.products.aggregate([
{ $unwind : "$reviews" },
{ $group: {
_id: {category: "$category", date: {$year: "$reviews.date"}},
count: { $sum: 1 }}},
{$sort:{"count": -1}}
])

Related

How to group multiple record using aggregate in mongodb

I want to group on the basis of the element key that can be similar in many records.
Business_portfolio_attributes details
[{
"_id" : ObjectId("613f2428d62bbe20db999133"),
"status" : 1,
"element" : "Current Assets",
"instituteBenchMarkId" : ObjectId("613b11006845993d61346bf9"),
"attributeValue" : 352907.04,
"startDate" : ISODate("2021-07-01T00:00:00.000Z"),
"reportId" : ObjectId("613f2427d62bbe20db999130"),
"businessId" : ObjectId("61370a5ac3941822005f91de"),
"category" : "BS",
"__v" : 0,
"createdAt" : ISODate("2021-09-13T10:12:56.334Z"),
"updatedAt" : ISODate("2021-09-13T10:12:56.334Z")
}]
The query I have used is mentioned below. Any alternative for this query?
db.getCollection('business_portfolio_attributes').aggregate([
{$match: { category: "BS"}},
{$group: { _id: "$instituteBenchMarkId", elements:{ $push: {
"element": "$element",
"startDate": "$startDate"
} }} }
])

MongoDB, Grouping by Multiple Fields

pretty new to Mongo and am finding some simple things that i would do in SQL frustratingly difficult in Mongo.
I have an object similar to this below
[{
"_id" : ObjectId("5870fb29a1fe030e1a2909db"),
"updatedAt" : ISODate("2017-01-07T14:28:57.224Z"),
"createdAt" : ISODate("2017-01-07T14:28:57.224Z"),
"state" : "Available",
},
{
"_id" : ObjectId("5870fb29a1fe030e1a2909dc"),
"updatedAt" : ISODate("2017-01-07T14:28:57.224Z"),
"createdAt" : ISODate("2017-01-07T14:28:57.224Z"),
"state" : "notReady",
},
{
"_id" : ObjectId("5870fb29a1fe030e1a2909d9"),
"updatedAt" : ISODate("2017-01-07T14:28:57.224Z"),
"createdAt" : ISODate("2017-01-07T14:28:57.224Z"),
"state" : "Disconnected",
}]
What i'm looking to do it group the data by the Maximum date and the state.
Ideally the result i would be looking for would be something like the following.
{
latestDate: "2017-01-07T14:28:57",
states : {
available : 10,
disconnected : 5,
notReady : 2
}}
Basically i'm looking for the SQL equivalent of this:
SELECT createdAt, state, COUNT(rowid)
FROM db
WHERE date = (SELECT MAX(createdAt) FROM db)
GROUP BY 1,2
I've searched around here and have found some good info but am probably missing something straight forward. Ive only managed to get here so far
db.collection.aggregate([
{$project: {"_id" : 0,"state": 1, "date" : "$createdAt"}},
{$group : {"_id" : {"date":"$date", "state": "actual"}, "count":{"$sum":1}}}
])
Any help would be appreciated :)
db.collection.aggregate([
{
$group : {
_id : {
date : "$createdAt",
state : "$state"
},
count : {$sum : 1}
}
},
{
$group : {
_id : "$_id.date",
states : {
$addToSet : {
state : "$_id.state",
count : "$count"
}
}
}
},
{
$sort : {_id : -1}
},
{
$limit : 1
},
{
$project : {
_id : 0,
latestDate : "$_id",
states : "$states"
}
}
])
output :
{
"latestDate" : ISODate("2017-01-07T14:28:57.224Z"),
"states" : [
{
"state" : "Available",
"count" : 1
},
{
"state" : "notReady",
"count" : 1
},
{
"state" : "Disconnected",
"count" : 1
}
]
}

MongoDB: $mod operator in aggregation pipeline

I have a restaurants collection that contains 3772 documents and I am trying to calculate the total number of documents that contain a score in first element of the grades array that's a multiple of 7 using the aggregation framework.
Query:
db.restaurants.aggregate([
{$project: {remainder: {$mod: ["$grades.0.score", 7]},
restaurant_id: 1,
name: 1,
grades: 1
}
},
{$match: {remainder: {$eq: 0}}},
{$group: {_id: null, total: {$sum: 1}}}
])
However, I am getting an error message that's caused by the use of the $mod operator in the $project pipeline stage. The error message is the following:
$mod only supports numeric types, not Array and NumberDouble
However, both $grades.0.score and 7 are integers, right? What should I change to make this query work as intended?
Example document:
{
"_id" : ObjectId("57290430139a4a37132c9e93"),
"address" : {
"building" : "469",
"coord" : [
-73.961704,
40.662942
],
"street" : "Flatbush Avenue",
"zipcode" : "11225"
},
"borough" : "Brooklyn",
"cuisine" : "Hamburgers",
"grades" : [
{
"date" : ISODate("2014-12-30T00:00:00Z"),
"grade" : "A",
"score" : 8
},
{
"date" : ISODate("2014-07-01T00:00:00Z"),
"grade" : "B",
"score" : 23
},
{
"date" : ISODate("2013-04-30T00:00:00Z"),
"grade" : "A",
"score" : 12
},
],
"name" : "Wendy'S",
"restaurant_id" : "30112340"
}
instead of $grades.0.score
put $grades[0].score
in your query.
the above is wrong. see below the correct form. As you want to filter by grades whose first score is a multiple of 7, you aggregation should start like this.
db.restaurants.aggregate([{$match: {"grades.0.score": {$mod: [7, 0]}}},{$group: {_id: null, total: {$sum: 1}}}])
I changed the grade.0.score to 7 and ran the command to check it is working or not, it seems it is working as you wanted.
> db.restaurants.find().pretty();
{
"_id" : 0,
"address" : {
"building" : "469",
"coord" : [
-73.961704,
40.662942
],
"street" : "Flatbush Avenue",
"zipcode" : "11225"
},
"borough" : "Brooklyn",
"cuisine" : "Hamburgers",
"grades" : [
{
"date" : ISODate("2014-12-30T00:00:00Z"),
"grade" : "A",
"score" : 7
},
{
"date" : ISODate("2014-07-01T00:00:00Z"),
"grade" : "B",
"score" : 23
},
{
"date" : ISODate("2013-04-30T00:00:00Z"),
"grade" : "A",
"score" : 12
}
],
"name" : "Wendy'S",
"restaurant_id" : "30112340"
> db.restaurants.aggregate([{$match: {"grades.0.score": {$mod: [7, 0]}}},{$group:{_id:null,count:{$sum:1}}} ])
{ "_id" : null, "count" : 1 }
First: why doesn't it work? Try:
db.restaurants.aggregate([
{$project: {
score0: "$grades.0.score",
restaurant_id: 1,
name: 1
}
}
])
You'll see that score0 returns [0 elements] so it does output an array hence the error message.
Based on this other question Get first element in array and return using Aggregate? (Mongodb), here is a solution to your problem:
db.restaurants.aggregate([
{$unwind: "$grades"},
{$group:{"_id":"$_id","grade0":{$first:"$grades"}}},
{$project: {
remainder: {$mod: ["$grade0.score", 7]},
restaurant_id: 1,
name: 1,
grade0: 1,
}
},
{$match: {remainder: {$eq: 0}}},
{$group: {_id: null, total: {$sum: 1}}}
])

Mongodb aggregation: how to use unwind->group->project multiple times

I have an orders collection where I need to calculate some sums from multiple sub-arrays arrays but I can't figure out how to loose the multiplied items that the double unwind creates.
db.Orders.aggregate(
{$unwind: "$items"},
{$unwind: "$shipping"},
{$group: {
_id: {
year: { '$year': '$createdAt' },
month: { '$month': '$createdAt' },
day: { '$dayOfMonth': '$createdAt' }
},
mainItems: { $addToSet: '$items' },
totalSales: {$sum: {
$multiply: ["$items.quantity", "$items.variants.price"]
}},
averageSales: {$avg: {$multiply: ["$items.quantity", "$items.variants.price"]}},
/* this will not sum the individial orders because the unwind
* created multiple document per order*/
ordersPlaced: {$sum: 1},
itemsPurchased: {$sum: "$items.quantity"},
totalRefundAmount: {$sum: 0},
chargedForShipping: {$sum: "$shipping.shipmentMethod.rate"}
}}
)
If I take out the shipping from the unwind and the group the query will return the correct values except for the chargedForShipping (0 since it's unwinded) and ordersPlaces which will still be more than expected (but I also need the shipping information and even more additional ones that I took out for easier understanding).
Sample data:
[{
"_id" : "xK29ZHxGcYvgWgx5p",
"sessionId" : "yw7e9G7uBzYTy9Grq",
"userId" : "fZREMm2DmsnMosMKj",
"shopId" : "oiqQDnuBwabj44q2o",
"billing" : [
{
"shopId" : "oiqQDnuBwabj44q2o",
"_id" : "9TMJj9w65MmAkgg27",
"paymentMethod" : {
"amount" : 22.45,
"status" : "settled",
"mode" : "capture",
"transactionId" : "AP",
"createdAt" : ISODate("2016-02-15T13:44:35.116Z"),
"transactions" : [
{ type:"refund", amount:5}
]
}
},
{
"shopId" : "9YfkXWyCci8fN43Pj",
"_id" : "RwW8xMnFzQqdTpqtg",
"paymentMethod" : {
"createdAt" : ISODate("2016-02-15T13:44:35.116Z")
}
},
{
"shopId" : "SgXWPKGJkxBw6qsbT",
"_id" : "ASizt6BtkxpCxgEJn",
"paymentMethod" : {
"createdAt" : ISODate("2016-02-15T13:44:35.116Z")
}
}
],
"shipping" : [
{
"_id" : "yXb5T5zLuxPYmgoT5",
"shipmentMethod" : {
"name" : "Continental US",
"_id" : "womiJX2QZBFQQWFur",
"rate" : 9.949999999999999,
"shopId" : "9YfkXWyCci8fN43Pj",
},
"items" : [
{
"_id" : "48s9bmDfrRMqnkije",
"productId" : "KXtF5xqERWJsXk2yP",
"shopId" : "SgXWPKGJkxBw6qsbT",
"variantId" : "YQDHuyPHbhx4wruZx",
"quantity" : 1
}
],
"packed" : false,
"shipped" : false,
}
],
"items" : [
{
"_id" : "hhuiGFTBkLACLpPjQ",
"shopId" : "9YfkXWyCci8fN43Pj",
"productId" : "sDYNXMrnRJiyQ8gex",
"quantity" : 1,
"variants" : {
"_id" : "muJi6Bqnq2CD8B7AR",
"price" : 2.5,
"title" : "egy",
"weight" : 23,
},
"type" : "simple",
},
{
"_id" : "48s9bmDfrRMqnkije",
"shopId" : "SgXWPKGJkxBw6qsbT",
"productId" : "KXtF5xqERWJsXk2yP",
"quantity" : 1,
"variants" : {
"_id" : "YQDHuyPHbhx4wruZx",
"title" : "Bogi varinat title",
"price" : 10,
"type" : "variant",
"compareAtPrice" : 100000,
"weight" : 100,
},
"type" : "simple",
}
],
"email" : "test#user.com",
"createdAt" : ISODate("2016-02-15T13:44:35.091Z"),
"updatedAt" : ISODate("2016-02-15T14:24:55.174Z")
}]
What I would need is orderTotal per month, shippingTotal per month, totalRefunded per month, average sales per month. The issue is one I need from the items sub-array the other from the shipping sub-array and the third from the billing sub-array that is why I have issues with the unwind.

How to ensure grouping via two separate criteria

Expanded from How to average the summed up values in mongodb?
Using MongoDB 2.4.8,
I have the following records
{
"category" : "TOYS",
"price" : 12,
"status" : "online",
"_id" : "35043"
}
{
"category" : "TOYS",
"price" : 13,
"status" : "offline",
"_id" : "35044"
}
{
"category" : "TOYS",
"price" : 22,
"status" : "online",
"_id" : "35045"
}
{
"category" : "BOOKS",
"price" : 13,
"status" : "offline",
"_id" : "35046"
}
{
"category" : "BOOKS",
"price" : 17,
"status" : "online",
"_id" : "35047"
}
{
"category" : "TOYS",
"price" : 19,
"status" : "unavailable",
"_id" : "35048"
}
{
"category" : "BOOKS",
"price" : 10,
"status" : "unavailable",
"_id" : "35049"
}
{
"category" : "BOOKS",
"price" : 17,
"status" : "unavailable",
"_id" : "35050"
}
I want to find the average price of all categories whose status is online OR offline and total price within a category is more than 50.
Toys offline and Toys online are considered two separate categories.
I adapted the answer given.
db.items.aggregate([
{$match:
{
$or: [
{status:"online"},
{status:"offline"}
]
}
},
{$group :
{
_id: "$category",
total_price: {$sum:"$price"},
}
},
{$match:
{
total_price:{$gt:50}
}
},
{$group :
{
_id: "1",
avg_price: {$avg:"$total_price"},
}
},
]);
But I believe this query I adapted grouped categories of the same name together which is not what I am looking for.
If online and offline are the only values for status, you can remove the initial $match step. If it is needed, it would be more appropriate to use the $in operator as these values could be found in the same index (if one existed).
I think the only step you are missing is that you can $group by multiple fields (i.e. category and status):
db.items.aggregate(
// If 'online' and 'offline' are the only possible status values, this may be unnecessary
{ $match: {
'status' : { $in: [ 'online', 'offline' ] }
}},
// Group by category & status
{ $group: {
_id: { category: "$category", status: "$status" },
total_price: { $sum: "$price" },
}},
// Only find groups where total_price is > 50
{ $match: {
total_price: { $gt:50 }
}},
// Find the average price for the group
{ $group : {
_id: null,
avg_price: {$avg:"$total_price"},
}}
)