Aggregate nested arrays - mongodb

I have multiple documents, and I'm trying to aggregate all documents with companyId = xxx and return one array with all the statuses.
So it will look like this:
[
{
"status": "created",
"date": "2019-03-16T10:59:59.200Z"
},
{
"status": "completed",
"date": "2019-03-16T11:00:37.750Z"
},
{
"status": "created",
"date": "2019-03-16T10:59:59.200Z"
},
{
"status": "completed",
"date": "2019-03-16T11:00:37.750Z"
},
{
"status": "created",
"date": "2019-03-16T10:59:59.200Z"
},
{
"status": "completed",
"date": "2019-03-16T11:00:37.750Z"
},
{
"status": "created",
"date": "2019-03-16T10:59:59.200Z"
},
{
"status": "completed",
"date": "2019-03-16T11:00:37.750Z"
}
]
The document look like this:
[
{
"companyId": "xxx",
"position": "",
"section": "",
"comment": "",
"items": [
{
"any": "111",
"name": "some name",
"description": "some description",
"version": "3",
"status": [
{
"status": "created",
"date": "2019-03-16T10:59:59.200Z"
},
{
"status": "completed",
"date": "2019-03-16T11:00:37.750Z"
}
]
},
{
"any": "222",
"name": "some name",
"description": "some description",
"version": "3",
"status": [
{
"status": "created",
"date": "2019-03-16T10:59:59.200Z"
},
{
"status": "completed",
"date": "2019-03-16T11:00:37.750Z"
}
]
}
]
},
{
"companyId": "xxx",
"position": "",
"section": "",
"comment": "",
"items": [
{
"any": "111",
"name": "some name",
"description": "some description",
"version": "3",
"status": [
{
"status": "created",
"date": "2019-03-16T10:59:59.200Z"
},
{
"status": "completed",
"date": "2019-03-16T11:00:37.750Z"
}
]
},
{
"any": "222",
"name": "some name",
"description": "some description",
"version": "3",
"status": [
{
"status": "created",
"date": "2019-03-16T10:59:59.200Z"
},
{
"status": "completed",
"date": "2019-03-16T11:00:37.750Z"
}
]
}
]
}
]
Any suggestion, how to implement this?
Then I want to loop over the array (in code) and count how many items in status created, and completed. maybe it could be done with the query?
Thanks in advance

You can use below aggregation:
db.col.aggregate([
{
$match: { companyId: "xxx" }
},
{
$unwind: "$items"
},
{
$unwind: "$items.status"
},
{
$replaceRoot: {
newRoot: "$items.status"
}
},
{
$group: {
_id: "$status",
count: { $sum: 1 }
}
}
])
Double $unwind will return single status per document and then you can use $replaceRoot to promote each status to root level of your document.
Additionally you can add $group stage to count documents by status.

In addition to the #mickl answer, you can add $project pipeline to get the result as a flat list of status and count.
db.collectionName.aggregate([
{
$match: { companyId: "xxx" }
},
{
$unwind: "$items"
},
{
$unwind: "$items.status"
},
{
$replaceRoot: {
newRoot: "$items.status"
}
},
{
$group: {
_id: "$status",
count: { $sum: 1 }
}
},
{
$project: {
"status":"$_id",
"count":1,
_id:0
}
}
])

If the number of documents on which you are executing the above query is too much then you should avoid using $unwind in the initial stage of aggregation pipeline.
Either you should use $project after $match to reduce the selection of fields or you can use below query:
db.col.aggregate([
{
$match: {
companyId: "xxx"
}
},
{
$project: {
_id: 0,
data: {
$reduce: {
input: "$items.status",
initialValue: [
],
in: {
$concatArrays: [
"$$this",
"$$value"
]
}
}
}
}
},
{
$unwind: "$data"
},
{
$replaceRoot: {
newRoot: "$data"
}
}
])

Related

How to filter array in nested documents in MongoDB?

I'm starting MongoDB and I have difficulties to understand how to filter some nested documents in an array. The objective if to keep only relevant data from a nested array.
Here is the data:
{
"_id": {
"$oid": "47bb"
},
"email": "myemail#gmail.com",
"orders": [
{
"orderNumber": "",
"products": [
{
"brand": "Brand 1",
"processing": {
"status": "pending"
}
}
],
"updated": {
"$date": {
"$numberLong": "1673031718883"
}
}
},
{
"orderNumber": "",
"products": [
{
"brand": "Brand 2",
"processing": {
"status": "pending"
}
}
],
"updated": {
"$date": {
"$numberLong": "1673031718883"
}
}
},
{
"orderNumber": "",
"products": [
{
"brand": "Brand 3",
"processing": {
"status": "processing"
}
}
],
"updated": {
"$date": {
"$numberLong": "1673031718883"
}
}
}
],
"privilege": {
"admin": false
},
"isVerified": {
"email": "true"
}
}
I want exactly the same data structure with 'orders.products.processing.status': 'pending'
The response from the database should be:
{
"_id": {
"$oid": "62b333644f70f94aa47bb4da"
},
"email": "myemail#gmail.com",
"orders": [
{
"orderNumber": "",
"products": [
{
"brand": "Brand 1",
"processing": {
"status": "pending"
}
}
],
"updated": {
"$date": {
"$numberLong": "1673031718883"
}
}
},
{
"orderNumber": "",
"products": [
{
"brand": "Brand 2",
"processing": {
"status": "pending"
}
}
],
"updated": {
"$date": {
"$numberLong": "1673031718883"
}
}
}
],
"privilege": {
"admin": false
},
"isVerified": {
"email": "true"
}
}
My closest attempt to a correct query is:
db.collection.aggregate([{
$unwind: '$orders'
},
{
$unwind: '$orders.products'
},
{
$match: {
"orders.products.processing.status": 'pending'
}
}, {
$group: {
_id: {
"_id": "$_id",
"email": "$email",
"orders": {
"orderNumber": "$orders.orderNumber",
"products": {
"processing": "$orders.products.processing.updated",
"brand": "$orders.products.brand",
}
},
},
products: {
$push: "$orders.products"
},
}
}, {
$project: {
products: 0,
}
}
])
The problem is that the result lose the grouping by _id and loosing the initial json structure. Thanks.
You can try this query:
First $match to get only documents which have orders.products.processing.status as pending (later will be filtered and maybe is redundant but using $map and $filter I prefer to avoid to do over all collection).
Then $project to get only desired values. Here the trick is to return in orders only the orders you want.
To accomplish that you can use $map to iterate over the array and return a new one with values that matches the filter (like a JS map).
And then the $filter. Here are filtered values whose status is not pending and returned to the map that output in the field orders.
And this without $unwind and $group :)
db.collection.aggregate([
{
"$match": {
"orders.products.processing.status": "pending"
}
},
{
"$project": {
"email": 1,
"isVerified": 1,
"privilege": 1,
"orders": {
"$map": {
"input": "$orders",
"as": "order",
"in": {
"orderNumber": "$$order.orderNumber",
"products": {
"$filter": {
"input": "$$order.products",
"cond": {
"$eq": [ "$$this.processing.status", "pending" ]
}
}
}
}
}
}
}
}
])
Example here
And also a bonus... check this example here I've added a one more $filter. It's so messy but if you can understand is quite easy. The $map from the first example return an array, so now I'm using a $filter ussing that array and filtering (not show) the objects where products is empty (i.e. where products.processing.status is not pending).

Adding a nested value as a field - MongDB aggregation

So I have a parent document with users, as well as an array that has users too. I want to add the DisplayName from the nested users array to the aggregation output. Any ideas?
Output I'm looking to achieve:
[
{
"user": {
"_id": "11",
"Name": "Dave",
"DocID": "1",
"DocDisplyName": "ABC"
},
{
"user": {
"_id": "33",
"Name": "Henry",
"DocID": "1",
"DocDisplyName": "ABC",
"BranchDisplayName:"BranchA"
}
}
]
And so on.. So an array of all users and for users that belong to a branch, add the branch display Name to the output.
// Doc 1
{
"_id": "1",
"DisplayName": "ABC",
"Users": [
{ "_id": "11", "Name": "Dave" },
{ "_id": "22", "Name": "Steve" }
],
"Branches": [
{
"_id": "111",
"DisplayName": "BranchA",
"Users": [
{ "_id": "33", "Name": "Henry" },
{ "_id": "44", "Name": "Josh" },
],
},
{
"_id": "222",
"DisplayName": "BranchB",
"Users": [
{ "_id": "55", "Name": "Mark" },
{ "_id": "66", "Name": "Anton" },
],
}
]
}
``Doc 2
{
"_id": "2",
"DisplayName": "DEF",
"Users": [
{ "_id": "77", "Name": "Josh" },
{ "_id": "88", "Name": "Steve" }
],
"Branches": [
{
"_id": "333",
"DisplayName": "BranchA",
"Users": [
{ "_id": "99", "Name": "Henry" },
{ "_id": "10", "Name": "Josh" },
],
},
{
"_id": "444",
"DisplayName": "BranchB",
"Users": [
{ "_id": "112", "Name": "Susan" },
{ "_id": "112", "Name": "Mary" },
],
}
]
}
Collection.aggregate([
{
$addFields: {
branchUsers: {
$reduce: {
input: "$Branches.Users",
initialValue: [],
in: {
$concatArrays: ["$$this", "$$value"],
},
},
},
},
},
{
$addFields: {
user: {
$concatArrays: ["$branchUsers", "$Users"],
},
},
},
{
$addFields: {
"user.DocID": "$_id","user.DocDisaplyName": "$DisplayName"
},
},
{
$unwind: "$user",
},
{
$project: {
_id: 0,
user: 1,
},
}
])
Thanks in advance!
OK I found a solution.
{
$addFields: {
"branchUsers.BranchDisplayName": {
$let: {
vars: {
first: {
$arrayElemAt: [ "$Branches", 0 ]
}
},
in: "$$first.DisplayName"
}
}
}
},
This creates the field only for the users that belong to the branch

Mongo Aggregation using $Max

I have a collection that stores history, i.e. a new document is created every time a change is made to the data, I need to extract fields based on the max value of a date field, however my query keeps returning either all of the dates or requires me to push the fields into an array which make the data hard to analyze for an end-user.
Expected output as CSV:
MAX(DATE), docID, url, type
1579719200216, 12371, www.foodnetwork.com, food
1579719200216, 12371, www.cnn.com, news,
1579719200216, 12371, www.wikipedia.com, info
Sample Doc:
{
"document": {
"revenueGroup": "fn",
"metaDescription": "",
"metaData": {
"audit": {
"lastModified": 1312414124,
"clientId": ""
},
"entities": [],
"docId": 1313943,
"url": ""
},
"rootUrl": "",
"taggedImages": {
"totalSize": 1,
"list": [
{
"image": {
"objectId": "woman-reaching-for-basket",
"caption": "",
"url": "",
"height": 3840,
"width": 5760,
"owner": "Facebook",
"alt": "Woman reaching for basket"
},
"tags": {
"totalSize": 4,
"list": []
}
}
]
},
"title": "The 8 Best Food Items of 2020",
"socialTitle": "The 8 Best Food Items of 2020",
"primaryImage": {
"objectId": "woman-reaching-for-basket.jpg",
"caption": "",
"url": "",
"height": 3840,
"width": 5760,
"owner": "Hero Images / Getty Images",
"alt": "Woman reaching for basket in laundry room"
},
"subheading": "Reduce your footprint with these top-performing diets",
"citations": {
"list": []
},
"docId": 1313943,
"revisionId": "1313943_1579719200216",
"templateType": "LIST",
"documentState": {
"activeDate": 579719200166,
"state": "ACTIVE"
}
},
"url": "",
"items": {
"totalSize": "",
"list": [
{
"type": "recipe",
"data": {
"comInfo": {
"list": [
{
"type": "food",
"id": "https://www.foodnetwork.com"
}
]
},
"type": ""
},
"id": 4,
"uuid": "1313ida-qdad3-42c3-b41d-223q2eq2j"
},
{
"type": "recipe",
"data": {
"comInfo": {
"list": [
{
"type": "news",
"id": "https://www.cnn.com"
},
{
"type": "info",
"id": "https://www.wikipedia.com"
}
]
},
"type": "PRODUCT"
},
"id": 11,
"uuid": "318231jc-da12-4475-8994-283u130d32"
}
]
},
"vertical": "food"
}
Below query:
db.collection.aggregate([
{
$match: {
vertical: "food",
"document.documentState.state": "ACTIVE",
"document.templateType": "LIST"
}
},
{
$unwind: "$document.items"
},
{
$unwind: "$document.items.list"
},
{
$unwind: "$document.items.list.contents"
},
{
$unwind: "$document.items.list.contents.list"
},
{
$match: {
"document.items.list.contents.list.type": "recipe",
"document.revenueGroup": "fn"
}
},
{
$sort: {
"document.revisionId": -1
}
},
{
$group: {
_id: {
_id: {
docId: "$document.docId",
date: {$max: "$document.revisionId"}
},
url: "$document.items.list.contents.list.data.comInfo.list.id",
type: "$document.items.list.contents.list.data.comInfo.list.type"
}
}
},
{
$project: {
_id: 1
}
},
{
$sort: {
"document.items.list.contents.list.id": 1, "document.revisionId": -1
}
}
], {
allowDiskUse: true
})
First of all, you need to go through the documentation of the $group aggregation here.
you should be doing this instead:
{
$group: {
"_id": "$document.docId"
"date": {
$max: "$document.revisionId"
},
"url": {
$first: "$document.items.list.contents.list.data.comInfo.list.id"
},
"type": {
$first:"$document.items.list.contents.list.data.comInfo.list.type"
}
}
}
This will give you the required output.

Combining unique elements of arrays without $unwind

I would like to get the unique elements of all arrays in a collection. Consider the following collection
[
{
"collection": "collection",
"myArray": [
{
"name": "ABC",
"code": "AB"
},
{
"name": "DEF",
"code": "DE"
}
]
},
{
"collection": "collection",
"myArray": [
{
"name": "GHI",
"code": "GH"
},
{
"name": "DEF",
"code": "DE"
}
]
}
]
I can achieve this by using $unwind and $group like this:
db.collection.aggregate([
{
$unwind: "$myArray"
},
{
$group: {
_id: null,
data: {
$addToSet: "$myArray"
}
}
}
])
And get the output:
[
{
"_id": null,
"data": [
{
"code": "GH",
"name": "GHI"
},
{
"code": "DE",
"name": "DEF"
},
{
"code": "AB",
"name": "ABC"
}
]
}
]
However, the array "myArray" will have a lot of elements (about 6) and the number of documents passed into this stage of the pipeline will be about 600. So unwinding the array would give me a total of 3600 documents being processed. I would like to know if there's a way for me to achieve the same result without unwinding
You can use below aggregation
db.collection.aggregate([
{ "$group": {
"_id": null,
"data": { "$push": "$myArray" }
}},
{ "$project": {
"data": {
"$reduce": {
"input": "$data",
"initialValue": [],
"in": { "$setUnion": ["$$this", "$$value"] }
}
}
}}
])
Output
[
{
"_id": null,
"data": [
{
"code": "AB",
"name": "ABC"
},
{
"code": "DE",
"name": "DEF"
},
{
"code": "GH",
"name": "GHI"
}
]
}
]

Need to return matched data from mongo db JSON

I have Json which have values like state_city details this contains information like which city belongs to which state -
Need to query it for particular state name which will gives me all cities that belongs to that state.
db.collection.find({
"count": 10,
"state.name": "MP"
})
[
{
"collection": "collection1",
"count": 10,
"state": [
{
"name": "MH",
"city": "Mumbai"
},
{
"name": "MH",
"city": "Pune"
},
{
"name": "UP",
"city": "Kanpur"
},
{
"name": "CG",
"city": "Raipur"
}
]
},
{
"collection": "collection2",
"count": 20,
"state": [
{
"name": "MP",
"city": "Indore"
},
{
"name": "MH",
"city": "Bhopal"
},
{
"name": "UP",
"city": "Kanpur"
},
{
"name": "CG",
"city": "Raipur"
}
]
}
]
You have to use aggregate query to get only matching elements in array :
db.collection.aggregate([{
$unwind: "$content.state"
},
{
$match: {
"content.state.name": "MH",
"count": 10
}
},
{
$group: {
_id: "$content.state.city",
}
},
{
$addFields: {
key: 1
}
},
{
$group: {
_id: "$key",
cities: {
$push: "$_id"
}
}
},
{
$project: {
_id: 0,
cities: 1
}
}
])
This query will return :
{
"cities": [
"Pune",
"Mumbai"
]
}
The following query would be the solution.
db.collection.find({ "count": 10, "state":{"name": "MP"}})
For more complex queries, $elemMatch is also available.