MongoDB - Help needed to make some aggregation - mongodb

I am having a bad time trying to do an aggregation in MongoDB.
I need to cross some infos from each user and as a final result I want a list of users (where there is only one object for each user) and for each object there is some lists with distinct information.
1 - The createdAtList array must be ordered from the oldest to the newest date. The sumOfTotal means the current position total summed up with the previous sumOfTotal (Exemplified in the code below), not just the sum of the total's
2 - The categotyList must be ordered like: category1, category2, category3 ...
3 - The desired final result must be ordered like: user1, user2, user3 ...
Basically I need some help to do the following:
//List of docs from my collection:
[
{
_id: "doc1",
user: "user1",
category: "category1",
createdAt: "2018-01-01T00:00:00.000Z"
},
{
_id: "doc2",
user: "user1",
category: "category2",
createdAt: "2017-12-12T00:00:00.000Z",
},
{
_id: "doc3",
user: "user1",
category: "category1",
createdAt: "2017-12-12T00:00:00.000Z",
},
{
_id: "doc4",
user: "user1",
category: "category2",
createdAt: "2018-01-01T00:00:00.000Z"
},
{
_id: "doc5",
user: "user1",
category: "category3",
createdAt: "2017-11-11T00:00:00.000Z"
}
]
//Desired result:
{
user: "user1",
createdAtList: [ //list ordered by createdAt
{
createdAt: "2017-11-11T00:00:00.000Z",
total: 1,
sumOfTotal: 0
}
{
createdAt: "2017-12-12T00:00:00.000Z",
total: 2,
sumOfTotal: 3 //summed up with the previous
}
{
createdAt: "2018-01-01T00:00:00.000Z",
total: 2,
sumOfTotal: 5 //summed up with the previous
}
],
categotyList: [ //list ordered by category
{
category: "category1",
total: 2
},
{
category: "category2",
total: 2
},
{
category: "category3",
total: 1
}
]
},
...
Is possible to do this in the same aggregate?

I do not think it really makes sense to have the createdAtList.sumOfTotal field. I do not think the fields in an array should be dependent upon a particular order of the elements. If you want some field to contain the sum of the createdAtList.total field, I think there should only be one field (outside of the array). That being said, here is the query I came up with to give you the desired results (using "users" as the name of the collection):
db.users.aggregate([
{
$group: {
_id: {
user: "$user",
createdAt: "$createdAt"
},
total: { $sum: 1 },
category: { $push: "$category" }
}
},
{
$project: {
_id: 0,
user: "$_id.user",
createdAt: "$_id.createdAt",
total: "$total",
category: 1
}
},
{ $unwind: "$category" },
{
$group: {
_id: {
user: "$user",
category: "$category"
},
catTotal: { $sum: 1 },
createdAtList: {
$push: {
createdAt: "$createdAt",
total: "$total"
}
}
}
},
{
$project: {
_id: 0,
user: "$_id.user",
createdAtList: 1,
category: "$_id.category",
catTotal: 1
}
},
{ $unwind: "$createdAtList" },
{
$group: {
_id: "$user",
createdAtList: {
$addToSet: "$createdAtList"
},
categoryList: {
$addToSet: {
category: "$category",
total: "$catTotal"
}
}
}
},
{ $unwind: "$createdAtList" },
{ $sort: { "createdAtList.createdAt": 1 } },
{
$group: {
_id: "$_id",
createdAtList: {
$push: "$createdAtList"
},
categoryList: {
$first: "$categoryList"
}
}
},
{ $unwind: "$categoryList" },
{ $sort: { "categoryList.category": 1 } },
{
$group: {
_id: "$_id",
createdAtList: {
$first: "$createdAtList"
},
categoryList: {
$push: "$categoryList"
}
}
},
{
$project: {
_id: 0,
user: "$_id",
createdAtList: 1,
sumOfTotal: { $sum: "$createdAtList.total" },
categoryList: 1
}
},
{ $sort: { user: 1 } },
]).pretty()

Related

MongoDB add grand total to sortByCount() in an aggregation pipeline

I have grouped all the users by country, but I would also like to have a row showing the grand total (users are tagged to a single country in our use case).
Data Model / Sample Input
The collection is filled with objects representing a country (name) and each contains a list of user objects in an array under users.
{ _id: ObjectId("..."),
name: 'SG',
type: 'COUNTRY',
increment: 200,
users:
[ ObjectId("..."),
ObjectId("..."),
...
Query
db.collection.aggregate([{$match:{type:"COUNTRY"}},{$unwind:"$users"},{$sortByCount:"$name"}])
Current Results
{ _id: 'SG', count: 76 }
{ _id: 'IN', count: 6 }
{ _id: 'US', count: 4 }
{ _id: 'FR', count: 3 }
{ _id: 'UK', count: 2 }
{ _id: 'RU', count: 1 }
{ _id: 'CO', count: 1 }
{ _id: 'DK', count: 1 }
{ _id: 'ID', count: 1 }
{ _id: 'PH', count: 1 }
Expected Results
{ _id: 'SG', count: 76 }
{ _id: 'IN', count: 6 }
{ _id: 'US', count: 4 }
{ _id: 'FR', count: 3 }
{ _id: 'UK', count: 2 }
{ _id: 'RU', count: 1 }
{ _id: 'CO', count: 1 }
{ _id: 'DK', count: 1 }
{ _id: 'ID', count: 1 }
{ _id: 'PH', count: 1 }
{ _id: null, count: 96 } <<< TOTAL COUNT ADDED
Any tips to achieve this without resorting to complex or dirty tricks?
You can also try using $facet to calculate counts by country name and total count, and then combine them together. Something like this:
db.collection.aggregate([
{
$match: {
type: "COUNTRY"
}
},
{
"$unwind": "$users"
},
{
"$facet": {
"groupCountByCountry": [
{
"$sortByCount": "$name"
}
],
"totalCount": [
{
"$group": {
"_id": null,
"count": {
"$sum": 1
}
}
}
]
}
},
{
"$project": {
array: {
"$concatArrays": [
"$groupCountByCountry",
"$totalCount"
]
}
}
},
{
"$unwind": "$array"
},
{
"$replaceRoot": {
"newRoot": "$$ROOT.array"
}
}
])
Here's the playground link.
I recommend just doing this in memory as the alternative is "hacky" but in order to achieve this in Mongo you just need to group all documents, add a new documents and unwind again, like so:
db.collection.aggregate([
{
$group: {
_id: null,
roots: {
$push: "$$ROOT"
},
sum: {
$sum: "$count"
}
}
},
{
$addFields: {
roots: {
"$concatArrays": [
"$roots",
[
{
_id: null,
count: "$sum"
}
]
]
}
}
},
{
$unwind: "$roots"
},
{
$replaceRoot: {
newRoot: "$roots"
}
}
])
Mongo Playground

MongoDB DeleteMany except latest document group by User

Below is an example of documents i have in a collection. I want to delete all the duplicate documents except for the one that has the latest revision number filtered by user and grouped by guid
[
{
_id: ObjectId("5e8e2d28ca6e660006f263e6"),
guid: 1,
revision: 1,
user:1
},
{
_id: ObjectId("5e8e2d28ca6e660006f263e7"),
guid: 1,
revision: 2,
user:1
},
{
_id: ObjectId("5e8e2d28ca6e660006f264d0"),
guid: 2,
revision: 5,
user:1
},
{
_id: ObjectId("5e8e2d28ca6e660006f264d1"),
guid: 1,
revision: 5,
user:1
},
{
_id: ObjectId("5e8e2d28ca6e660006f264d2"),
guid: 3,
revision: 5,
user:2
}
]
Expect output should delete following documents
For user : 1
-- _id: ObjectId("5e8e2d28ca6e660006f263e6"),
-- _id: ObjectId("5e8e2d28ca6e660006f263e7"),
-- _id: ObjectId("5e8e2d28ca6e660006f264d0"),
For user : 2
None
I have tried below query it works but fails in scenario when i have same guid and same revision number. In case both are same then anyone can be deleted
db.collection.find({guid:1,revision:1}).sort({revision:-1}).forEach(function(doc){
console.log(db.collection.deleteMany({revision:{$lt:doc.revision}, guid:doc.guid}));
})
Please help. Thanks in advance
Try this
Step 1
Get the documents that are lastly revised
let data = await db.collection.aggregate([
{
$sort: {
user: 1,
guid: 1,
revision: -1
}
},
{
$group: {
_id: {
guid: "$guid",
user: "$user"
},
id: {
$first: "$_id"
},
guid: {
$first: "$guid"
},
user: {
$first: "$user"
},
revision: {
$first: {
$max: "$revision"
}
},
doc: {
$last: "$$ROOT"
}
},
},
{
$project: {
_id: "$id",
guid: 1,
revision: 1,
user: 1
}
},
])
mongoplayground
Step 2
Get the _ids of the resultant data
x = data.map(item=>item._id);
Step 3
Delete documents other than x
db1.deleteMany({ "_id": { $nin: x } });
Option 2 for Single query to do the same task
let data = await db.collection.aggregate([
{
$sort: {
user: 1,
guid: 1,
revision: -1
}
},
{
$group: {
_id: {
guid: "$guid",
user: "$user"
},
id: {
$first: "$_id"
},
guid: {
$first: "$guid"
},
user: {
$first: "$user"
},
revision: {
$first: {
$max: "$revision"
}
},
doc: {
$last: "$$ROOT"
}
},
},
{
$project: {
_id: "$id",
guid: 1,
revision: 1,
user: 1
}
},
{ $out: "temp1" } // temp1 is the collection name
])

How to paginate in MongoDB with aggregate

I have a collection of documents, each having a groupID. I am able to retrieve only one document from each group, but I'm not sure how to do the effective pagination.
Example:
{
unitquantity: "2"
itemcode: "842852100008"
name: "Atlas Black"
price: "39990"
size: "s"
groupid: "40bf6073-a1d3-4ffa-9ced-dd2f5fcd1b5e",
},
{
unitquantity: "2"
itemcode: "842852100382"
name: "Atlas Black"
price: "39990"
size: "m"
groupid: "40bf6073-a1d3-4ffa-9ced-dd2f5fcd1b5e",
},
{
unitquantity: "2"
itemcode: "842852100746"
name: "Atlas Black"
price: "39990"
size: "xl"
groupid: "40bf6073-a1d3-4ffa-9ced-dd2f5fcd1b5e",
},
These 3 items have a same groupid but they differ on size. I only need to retrieve one from the group, doesn't matter the difference (in this instance it would be size).
I am able to do that with:
var query = {[`category.${type}`]: `${type}`}
var query = {[`category.${type}`]: `${type}`}
db.collection('products').aggregate([
{ $match: query },
{
$group: {
_id: "$groupid",
images: { $last: "$images" },
description: { $last: "$description" },
barcode: { $last: "$barcode" },
category: { $last: "$category" },
subcategory: { $last: "$subcategory" },
id: { $last: "$_id" }
}
},
{ $sort: {'_id': -1} },
{ $limit: 30 }
]).toArray().then(result=>{
lastKey = result[result.length - 1].id
groupId = result[result.length - 1].groupid
})
I store the last groupId into the variable. When I press the Load button I want next 30 to appear. Here is my load code which is basically exactly the same except the query for $match uses the document's _id:
var query = {[`category.${type}`]: `${type}`, 'groupid': { $ne: `${groupId}`}, '_id': {$gt: lastKey}}
db.collection('products').aggregate([
{ $match: query },
{
$group: {
_id: "$groupid",
groupid: { $last: "$groupid" },
images: { $last: "$images" },
description: { $last: "$description" },
barcode: { $last: "$barcode" },
category: { $last: "$category" },
subcategory: { $last: "$subcategory" },
id: { $last: "$_id" }
}
},
{ $sort: {'_id': -1} },
{ $limit: 30 },
]).toArray().then(result => {
lastKey = result[result.length - 1].id
groupId = result[result.length - 1]._id
})
However I get many that repeat themselves.
I saw similar issues other had with $group and $sort and some needed to use $sort before $group and after as well, but it doesn't work for me. I don't really know how to sort them based on document _id correctly.
You can use $skip with the page count. you have to manage page count state in
client
db.collection('products').aggregate([
{ $match: query },
{
$group: {
_id: "$groupid",
groupid: { $last: "$groupid" },
images: { $last: "$images" },
description: { $last: "$description" },
barcode: { $last: "$barcode" },
category: { $last: "$category" },
subcategory: { $last: "$subcategory" },
id: { $last: "$_id" }
}
},
{ $sort: {'_id': -1} },
{ $skip: 30 * page },
{ $limit: 30 },
]).toArray().then(result => {
lastKey = result[result.length - 1].id
groupId = result[result.length - 1]._id
})

MongoDB multiple nested groups

I have documents in mongodb like this
{
_id: "5cfed55974c7c52ecc33ada8",
name: "Garona",
realm: "Blackrock",
faction: "Horde",
race: "Orc",
class: "Rogue",
guild: "",
level: 33,
lastSeen: "2019-06-10T00:00:00.000Z",
__v: 0
},
{
_id: "5cfed55974c7c52ecc33ade8",
name: "Muradin",
realm: "Alleria",
faction: "Alliance",
race: "Dwarf",
class: "Warrior",
guild: "Stormstout Brewing Co",
level: 42,
lastSeen: "2019-06-11T00:00:00.000Z",
__v: 0
}
What I'm trying to do, is to group by a fields and get a sum of it. So far I figured it out to do it for one field at once like so
{
$group: {
_id: {
classes: '1',
class: '$class'
},
total: { $sum: 1 }
}
},
{
$group: {
_id: '$_id.classes',
total: { $sum: '$total' },
classes: {
$push: {
class: '$_id.class',
total: '$total'
}
}
}
}
Which produces something like this
{
_id: "1",
total: 40,
classes: [
{
class: "Warrior",
total: 17
},
{
class: "Rogue",
total: 23
}
}
But I want to do it for more than one field at once, so that I can get an output like this.
{
_id: "1",
total: 40,
classes: [
{
class: "Warrior",
total: 17
},
{
class: "Rogue",
total: 23
},
factions: [
{
faction: "Alliance",
total: 27
},
{
faction: "Horde",
total: 13
}
}
No I'm wondering if it is even possible to do it in one query in an easy way or if I would be better to do a seperate query for each field.
You can do this by using the $facet aggregation stage
Processes multiple aggregation pipelines within a single stage on the same set of input documents. Each sub-pipeline has its own field in the output document where its results are stored as an array of documents.
I only slightly modified your original pipeline, and then just copied it for the 'factions' field.
The last 3 stages in my solution aren't really necessary, they just clean up the output a little bit.
You can probably take it from here, good luck.
db.collection.aggregate([
{
"$facet": {
"classes": [
{
$group: {
_id: "$class",
total: {
$sum: 1
}
}
},
{
$group: {
_id: null,
total: {
$sum: "$total"
},
"classes": {
$push: {
class: "$_id",
total: "$total"
}
}
}
}
],
"factions": [
{
$group: {
_id: "$faction",
total: {
$sum: 1
}
}
},
{
$group: {
_id: null,
total: {
$sum: "$total"
},
"factions": {
$push: {
faction: "$_id",
total: "$total"
}
}
}
}
]
}
},
{
$unwind: "$classes"
},
{
$unwind: "$factions"
},
{
$project: {
"classes._id": 0,
"factions._id": 0
}
}
])
Output
[
{
"classes": {
"classes": [
{
"class": "Warrior",
"total": 1
},
{
"class": "Rogue",
"total": 1
}
],
"total": 2
},
"factions": {
"factions": [
{
"faction": "Alliance",
"total": 1
},
{
"faction": "Horde",
"total": 1
}
],
"total": 2
}
}
]

Select top 3 per field after group MongoDB

I have a collection with fields like "servicereqesttype", "zipcode", "date"
I want to fing the 3 most common "servicerequesttype" per zipcode for a specific day.
db.event.aggregate([
{
$match: {
creationdate: "2011-01-01"
}
},
{
$project: {
zipcode: "$zipcode",
servicerequesttype: "$servicerequesttype"
}
},
{
$group: {
_id: {
zipcode: "$zipcode",
servicerequesttype: "$servicerequesttype"
},
zipcode: {
$first: "$zipcode"
},
servicerequesttype: {
$first: "$servicerequesttype"
},
count: {$sum: 1}
}
},
{
$sort: {
"zipcode": -1,
"count": -1
}
},
{
$project: {
_id: 0,
zipcode: "$zipcode",
servicerequesttype: "$servicerequesttype",
count: "$count"
}
}
])
now all I have to is to select only 3 per zipcode and I need some help, maybe I have to use $bucket or $map...
db.event.aggregate([
{
$match: {
creationdate: "2011-01-01"
}
},
{
$project: {
zipcode: "$zipcode",
servicerequesttype: "$servicerequesttype"
}
},
{
$group: {
_id: {
zipcode: "$zipcode",
servicerequesttype: "$servicerequesttype"
},
zipcode: {
$first: "$zipcode"
},
servicerequesttype: {
$first: "$servicerequesttype"
},
count: {$sum: 1}
}
},
{
$sort: {
"zipcode": -1,
"count": -1
}
},
{
$project: {
_id: 0,
zipcode: "$zipcode",
servicerequesttype: "$servicerequesttype",
count: "$count",
arrayOfTypes: "$array1",
arrayOfIncidents: "$array2"
}
},
{
$group: {
_id: "$zipcode",
arrayOfTypes: {
$push: {type: "$servicerequesttype", count: "$count"}
}
}
},
{
$project: {
_id: "$_id",
array: {
$slice: ["$arrayOfTypes", 3]
}
}
},
{
$sort: {
"_id": -1
}
}
])