Combining unique elements of arrays without $unwind - mongodb

I would like to get the unique elements of all arrays in a collection. Consider the following collection
[
{
"collection": "collection",
"myArray": [
{
"name": "ABC",
"code": "AB"
},
{
"name": "DEF",
"code": "DE"
}
]
},
{
"collection": "collection",
"myArray": [
{
"name": "GHI",
"code": "GH"
},
{
"name": "DEF",
"code": "DE"
}
]
}
]
I can achieve this by using $unwind and $group like this:
db.collection.aggregate([
{
$unwind: "$myArray"
},
{
$group: {
_id: null,
data: {
$addToSet: "$myArray"
}
}
}
])
And get the output:
[
{
"_id": null,
"data": [
{
"code": "GH",
"name": "GHI"
},
{
"code": "DE",
"name": "DEF"
},
{
"code": "AB",
"name": "ABC"
}
]
}
]
However, the array "myArray" will have a lot of elements (about 6) and the number of documents passed into this stage of the pipeline will be about 600. So unwinding the array would give me a total of 3600 documents being processed. I would like to know if there's a way for me to achieve the same result without unwinding

You can use below aggregation
db.collection.aggregate([
{ "$group": {
"_id": null,
"data": { "$push": "$myArray" }
}},
{ "$project": {
"data": {
"$reduce": {
"input": "$data",
"initialValue": [],
"in": { "$setUnion": ["$$this", "$$value"] }
}
}
}}
])
Output
[
{
"_id": null,
"data": [
{
"code": "AB",
"name": "ABC"
},
{
"code": "DE",
"name": "DEF"
},
{
"code": "GH",
"name": "GHI"
}
]
}
]

Related

mongodb distinct query values

I have the following mongodb documents:
{
"_id": "",
"name": "example1",
"colors": [
{
"id": 1000000,
"properties": [
{
"id": "1000",
"name": "",
"value": "green"
},
{
"id": "2000",
"name": "",
"value": "circle"
}
]
} ]
}
{
"_id": "",
"name": "example2",
"colors": [
{
"id": 1000000,
"properties": [
{
"id": "1000",
"name": "",
"value": "red"
},
{
"id": "4000",
"name": "",
"value": "box"
}
]
} ]
}
I would like to get distinct queries on the value field in the array where id=1000
db.getCollection('product').distinct('colors.properties.value', {'colors.properties.id':{'$eq': 1000}})
but it returns all values in the array.
The expected Result would be:
["green", "red"]
There are a lot of way to do.
$match eliminates unwanted data
$unwind de-structure the array
$addToSet in $group gives the distinct data
The mongo script :
db.collection.aggregate([
{
$match: {
"colors.properties.id": "1000"
}
},
{
"$unwind": "$colors"
},
{
"$unwind": "$colors.properties"
},
{
$match: {
"colors.properties.id": "1000"
}
},
{
$group: {
_id: null,
distinctData: {
$addToSet: "$colors.properties.value"
}
}
}
])
Working Mongo playground

Get the $size (length) of a nested array and calculate the difference to a stored value on the parent object - using aggregate

Let's consider that I have the following documents (ignoring the _id):
[
{
"Id": "Store1",
"Info": {
"Location": "Store1 Street",
"PhoneNumber": 111
},
"MaxItemsPerShelf": 3,
"Shelf": [
{
"Id": "Shelf1",
"Items": [
{
"Id": "Item1",
"Name": "bananas"
},
{
"Id": "Item2",
"Name": "apples"
},
{
"Id": "Item3",
"Name": "oranges"
}
]
},
{
"Id": "Shelf2",
"Items": [
{
"Id": "Item4",
"Name": "cookies"
},
{
"Id": "Item5",
"Name": "chocolate"
}
]
},
{
"Id": "Shelf3",
"Items": []
}
]
},
{
"Id": "Store3",
"Info": {
"Location": "Store2 Street",
"PhoneNumber": 222
},
"MaxItemsPerShelf": 2,
"Shelf": [
{
"Id": "Shelf4",
"Items": [
{
"Id": "Item6",
"Name": "champoo"
},
{
"Id": "Item7",
"Name": "toothpaste"
}
]
},
{
"Id": "Shelf5",
"Items": [
{
"Id": "Item8",
"Name": "chicken"
}
]
}
]
}
]
Given a specific Shelf.Id I want to get the following result ( Shelf.Id = "Shelf2"):
[{
"Info": {
"Location": "Store1 Street",
"PhoneNumber": 111
},
"ItemsNumber": 2,
"ItemsRemaining": 1
}]
Therefore:
ItemsNumberis the $size of Shelf
and
ItemsRemainingis equal to MaxItemsPerShelf $size of Shelf
also I want to copy the value of the Info to the aggregate output.
How can I accomplish this with aggregate? On my efforts I couldn't pass through an iterator that gets the $size of $Shelf.Items
You can use below aggregation
db.collection.aggregate([
{ "$match": { "Shelf.Id": "Shelf2" }},
{ "$replaceRoot": {
"newRoot": {
"$let": {
"vars": {
"shelf": {
"$filter": {
"input": {
"$map": {
"input": "$Shelf",
"in": {
"Id": "$$this.Id",
"count": { "$size": "$$this.Items" }
}
}
},
"as": "ss",
"cond": { "$eq": ["$$ss.Id", "Shelf2"] }
}
}
},
"in": {
"Info": "$Info",
"ItemsNumber": { "$arrayElemAt": ["$$shelf.count", 0] },
"ItemsRemaining": {
"$subtract": [
"$MaxItemsPerShelf",
{ "$ifNull": [
{ "$arrayElemAt": ["$$shelf.count", 0] },
0
]}
]
}
}
}
}
}}
])

Aggregate nested arrays

I have multiple documents, and I'm trying to aggregate all documents with companyId = xxx and return one array with all the statuses.
So it will look like this:
[
{
"status": "created",
"date": "2019-03-16T10:59:59.200Z"
},
{
"status": "completed",
"date": "2019-03-16T11:00:37.750Z"
},
{
"status": "created",
"date": "2019-03-16T10:59:59.200Z"
},
{
"status": "completed",
"date": "2019-03-16T11:00:37.750Z"
},
{
"status": "created",
"date": "2019-03-16T10:59:59.200Z"
},
{
"status": "completed",
"date": "2019-03-16T11:00:37.750Z"
},
{
"status": "created",
"date": "2019-03-16T10:59:59.200Z"
},
{
"status": "completed",
"date": "2019-03-16T11:00:37.750Z"
}
]
The document look like this:
[
{
"companyId": "xxx",
"position": "",
"section": "",
"comment": "",
"items": [
{
"any": "111",
"name": "some name",
"description": "some description",
"version": "3",
"status": [
{
"status": "created",
"date": "2019-03-16T10:59:59.200Z"
},
{
"status": "completed",
"date": "2019-03-16T11:00:37.750Z"
}
]
},
{
"any": "222",
"name": "some name",
"description": "some description",
"version": "3",
"status": [
{
"status": "created",
"date": "2019-03-16T10:59:59.200Z"
},
{
"status": "completed",
"date": "2019-03-16T11:00:37.750Z"
}
]
}
]
},
{
"companyId": "xxx",
"position": "",
"section": "",
"comment": "",
"items": [
{
"any": "111",
"name": "some name",
"description": "some description",
"version": "3",
"status": [
{
"status": "created",
"date": "2019-03-16T10:59:59.200Z"
},
{
"status": "completed",
"date": "2019-03-16T11:00:37.750Z"
}
]
},
{
"any": "222",
"name": "some name",
"description": "some description",
"version": "3",
"status": [
{
"status": "created",
"date": "2019-03-16T10:59:59.200Z"
},
{
"status": "completed",
"date": "2019-03-16T11:00:37.750Z"
}
]
}
]
}
]
Any suggestion, how to implement this?
Then I want to loop over the array (in code) and count how many items in status created, and completed. maybe it could be done with the query?
Thanks in advance
You can use below aggregation:
db.col.aggregate([
{
$match: { companyId: "xxx" }
},
{
$unwind: "$items"
},
{
$unwind: "$items.status"
},
{
$replaceRoot: {
newRoot: "$items.status"
}
},
{
$group: {
_id: "$status",
count: { $sum: 1 }
}
}
])
Double $unwind will return single status per document and then you can use $replaceRoot to promote each status to root level of your document.
Additionally you can add $group stage to count documents by status.
In addition to the #mickl answer, you can add $project pipeline to get the result as a flat list of status and count.
db.collectionName.aggregate([
{
$match: { companyId: "xxx" }
},
{
$unwind: "$items"
},
{
$unwind: "$items.status"
},
{
$replaceRoot: {
newRoot: "$items.status"
}
},
{
$group: {
_id: "$status",
count: { $sum: 1 }
}
},
{
$project: {
"status":"$_id",
"count":1,
_id:0
}
}
])
If the number of documents on which you are executing the above query is too much then you should avoid using $unwind in the initial stage of aggregation pipeline.
Either you should use $project after $match to reduce the selection of fields or you can use below query:
db.col.aggregate([
{
$match: {
companyId: "xxx"
}
},
{
$project: {
_id: 0,
data: {
$reduce: {
input: "$items.status",
initialValue: [
],
in: {
$concatArrays: [
"$$this",
"$$value"
]
}
}
}
}
},
{
$unwind: "$data"
},
{
$replaceRoot: {
newRoot: "$data"
}
}
])

Mongodb 3.2 (Aggregation) : Group by multiple values of the same attribute

I have the following count queries :
db.collection.count({code : {$in : ['delta', 'beta']}});
db.collection.count({code : 'alpha'});
for the given data set
[
{
"code": "detla",
"name": "delta1"
},
{
"code": "detla",
"name": "delta2"
},
{
"code": "detla",
"name": "delta3"
},
{
"code": "detla",
"name": "delta4"
},
{
"code": "beta",
"name": "beta1"
},
{
"code": "beta",
"name": "beta2"
},
{
"code": "beta",
"name": "beta3"
},
{
"code": "beta",
"name": "beta4"
},
{
"code": "beta",
"name": "beta5"
},
{
"code": "alpha",
"name": "alpha1"
},
{
"code": "alpha",
"name": "alpha2"
},
{
"code": "alpha",
"name": "alpha3"
}
]
Is there any way to achieve this using single aggregation() and nested $group in mongodb#3.2?
I understand that this is a classic usecase for mapReduce(), but I am out of option here as the $group by parameters (code in the above example), is a dynamically generated attribute, hence the aggregation stages building is also dynamic.
Expected output is same as the result of the two count queries.
-> First count query (delta, beta combined count) -> 9
-> Second count query (alpha count) -> 3
You can use below $group aggregation
db.collection.aggregate([
{ "$group": {
"_id": null,
"first": {
"$sum": {
"$cond": [{ "$in": ["$code", ["detla", "beta"]] }, 1, 0]
}
},
"second": {
"$sum": {
"$cond": [{ "$eq": ["alpha", "$code"] }, 1, 0]
}
}
}}
])
But I will prefer to go with the two count queries for the better performance
$in is also released in version 3.4. You can either use $setIsSubset for the prior versions
db.collection.aggregate([
{ "$group": {
"_id": null,
"first": {
"$sum": {
"$cond": [{ "$setIsSubset": [["$code"], ["detla", "beta"]] }, 1, 0]
}
},
"second": {
"$sum": {
"$cond": [{ "$eq": ["alpha", "$code"] }, 1, 0]
}
}
}}
])

MongoDB projection. Operator $add field|expression array awareness or after $slice

I've got collection that looks like:
[{
"org": "A",
"type": "simple",
"payFor": 3,
"price": 100
},
{
"org": "A",
"type": "custom",
"payFor": 2,
"price": 115
},
{
"org": "B",
"type": "simple",
"payFor": 1,
"price": 110
},
{
"org": "B",
"type": "custom",
"payFor": 2,
"price": 200
},
{
"org": "B",
"type": "custom",
"payFor": 4,
"price": 220
}]
And need to produce result with query to perform group by "org" where payments appears for only first "payFor" prices in "type".
I'm trying to use expression result by $slice operator in $add but this is not works.
pipeline:
[{
"$group": {
"_id": {
"org": "$org",
"type": "$type"
},
"payFor": {
"$max": "$payFor"
},
"count": {
"$sum": 1
},
"prices": {
"$push": "$price"
}
}
},
{
"$group": {
"_id": "$_id.org",
"payments": {
"$push": {
"type": "$_id.type",
"forFirst": "$payFor",
"sum": {
"$cond": [
{
"$gte": [
"$payFor",
"$count"
]
},
{
"$add": {
"$prices": {
"$slice": "$count"
}
}
},
{
"$add": "$prices"
}
]
}
}
}
}
}]
I know that it is possible to traverse unwinded prices and pick only "payFor" count of them. but result collections are more rich than in example above and this operation will produce some unecessary overheads.
Need some advice from community. Please. Thanks.