Return union of embedded arrays - mongodb

I have a collection with documents, each document has an array. I want to get an array which is the result of the union of the document's embedded arrays.
This is my collection:
{
"_id": ObjectId("...."),
"filter": "a",
"images": [
{
"_id": ObjectId("...."),
"file": "file_a_1.jpg"
},
{
"_id": ObjectId("...."),
"file": "file_a_2.jpg"
}
]
},
{
"_id": ObjectId("...."),
"filter": "b",
"images": [
{
"_id": ObjectId("...."),
"file": "file_b_3.jpg"
},
{
"_id": ObjectId("...."),
"file": "file_b_4.jpg"
}
]
},
{
"_id": ObjectId("...."),
"filter": "a",
"images": [
{
"_id": ObjectId("...."),
"file": "file_a_5.jpg"
},
{
"_id": ObjectId("...."),
"file": "file_a_6.jpg"
}
]
}
And I would like to get the embedded arrays of the documents with filter = "a" for example.
{
"_id": ObjectId("...."),
"file": "file_a_1.jpg"
},
{
"_id": ObjectId("...."),
"file": "file_a_2.jpg"
},
{
"_id": ObjectId("...."),
"file": "file_a_5.jpg"
},
{
"_id": ObjectId("...."),
"file": "file_a_6.jpg"
}
And I would like to be able to limit the size of the returned array and the offset.

you need to unwind such array, then project to get new document shape.
db.david1.aggregate([{
$match : {
"filter" : "a"
}
}, {
$unwind : "$images"
}, {
$project : {
_id : "$images._id",
file : "$images.file"
}
}, {
$skip : 2
}, {
$limit : 1
}
]).toArray()

Related

I am having difficulty in querying the follwing nested document using pymongo

If these are the following nested documents
[
{
"_id": 5,
"name": "Wilburn Spiess",
"scores": [
{
"score": 44.87186330181261,
"type": "exam"
},
{
"score": 25.72395114668016,
"type": "quiz"
},
{
"score": 63.42288310628662,
"type": "homework"
}
]
},
{
"_id": 6,
"name": "Jenette Flanders",
"scores": [
{
"score": 37.32285459166097,
"type": "exam"
},
{
"score": 28.32634976913737,
"type": "quiz"
},
{
"score": 81.57115318686338,
"type": "homework"
}
]
},
{
"_id": 7,
"name": "Salena Olmos",
"scores": [
{
"score": 90.37826509157176,
"type": "exam"
},
{
"score": 42.48780666956811,
"type": "quiz"
},
{
"score": 96.52986171633331,
"type": "homework"
}
]
}
]
I need to access the score part 'type' = exam.
Can somebody help me with this?
If you're asking for a python program to access the score, you can print them out like:
collection = mongo_connection['db']['collection']
documents = collection.find({})
for doc in documents:
for score in doc['scores']:
if score['type'] == 'exam':
print(f'Score: {score["score"]}')
If you are trying to retrieve only the scores and ignore the rest, I'd do an $unwind on the scores, $match on the type, and then project the fields you want (or not).
db.test.aggregate([
{
$unwind: '$scores'
},
{
$match: {
'scores.type': 'exam'
}
},
{
$project: {
'name': '$name',
'score': '$scores.score'
}
}
])
This would output:
{
"_id" : 5,
"name" : "Wilburn Spiess",
"score" : 44.8718633018126
},
{
"_id" : 6,
"name" : "Jenette Flanders",
"score" : 37.322854591661
},
{
"_id" : 7,
"name" : "Salena Olmos",
"score" : 90.3782650915718
}

Adding a nested value as a field - MongDB aggregation

So I have a parent document with users, as well as an array that has users too. I want to add the DisplayName from the nested users array to the aggregation output. Any ideas?
Output I'm looking to achieve:
[
{
"user": {
"_id": "11",
"Name": "Dave",
"DocID": "1",
"DocDisplyName": "ABC"
},
{
"user": {
"_id": "33",
"Name": "Henry",
"DocID": "1",
"DocDisplyName": "ABC",
"BranchDisplayName:"BranchA"
}
}
]
And so on.. So an array of all users and for users that belong to a branch, add the branch display Name to the output.
// Doc 1
{
"_id": "1",
"DisplayName": "ABC",
"Users": [
{ "_id": "11", "Name": "Dave" },
{ "_id": "22", "Name": "Steve" }
],
"Branches": [
{
"_id": "111",
"DisplayName": "BranchA",
"Users": [
{ "_id": "33", "Name": "Henry" },
{ "_id": "44", "Name": "Josh" },
],
},
{
"_id": "222",
"DisplayName": "BranchB",
"Users": [
{ "_id": "55", "Name": "Mark" },
{ "_id": "66", "Name": "Anton" },
],
}
]
}
``Doc 2
{
"_id": "2",
"DisplayName": "DEF",
"Users": [
{ "_id": "77", "Name": "Josh" },
{ "_id": "88", "Name": "Steve" }
],
"Branches": [
{
"_id": "333",
"DisplayName": "BranchA",
"Users": [
{ "_id": "99", "Name": "Henry" },
{ "_id": "10", "Name": "Josh" },
],
},
{
"_id": "444",
"DisplayName": "BranchB",
"Users": [
{ "_id": "112", "Name": "Susan" },
{ "_id": "112", "Name": "Mary" },
],
}
]
}
Collection.aggregate([
{
$addFields: {
branchUsers: {
$reduce: {
input: "$Branches.Users",
initialValue: [],
in: {
$concatArrays: ["$$this", "$$value"],
},
},
},
},
},
{
$addFields: {
user: {
$concatArrays: ["$branchUsers", "$Users"],
},
},
},
{
$addFields: {
"user.DocID": "$_id","user.DocDisaplyName": "$DisplayName"
},
},
{
$unwind: "$user",
},
{
$project: {
_id: 0,
user: 1,
},
}
])
Thanks in advance!
OK I found a solution.
{
$addFields: {
"branchUsers.BranchDisplayName": {
$let: {
vars: {
first: {
$arrayElemAt: [ "$Branches", 0 ]
}
},
in: "$$first.DisplayName"
}
}
}
},
This creates the field only for the users that belong to the branch

Group by an optional field in mongodb

I would like to independently group the results of an or clause, including overlap. The data set is rather large so running 2 queries sequentially will result in an undesirable wait time. I am hoping I can somehow project which clause returned the corresponding data. Given this data set:
[
{
"_id": 1,
"item": "abc",
"name": "Michael",
"price": NumberDecimal("10"),
"quantity": NumberInt("2"),
"date": ISODate("2014-03-01T08:00:00Z")
},
{
"_id": 2,
"item": "jkl",
"name": "Toby",
"price": NumberDecimal("20"),
"quantity": NumberInt("1"),
"date": ISODate("2014-03-01T09:00:00Z")
},
{
"_id": 3,
"item": "xyz",
"name": "Keith",
"price": NumberDecimal("5"),
"quantity": NumberInt("10"),
"date": ISODate("2014-03-15T09:00:00Z")
},
{
"_id": 4,
"item": "abc",
"name": "Dwight",
"price": NumberDecimal("5"),
"quantity": NumberInt("20"),
"date": ISODate("2014-04-04T11:21:39.736Z")
},
{
"_id": 5,
"item": "abc",
"name": "Ryan",
"price": NumberDecimal("10"),
"quantity": NumberInt("10"),
"date": ISODate("2014-04-04T21:23:13.331Z")
},
{
"_id": 6,
"item": "def",
"name": "Jim",
"price": NumberDecimal("7.5"),
"quantity": NumberInt("5"),
"date": ISODate("2015-06-04T05:08:13Z")
},
{
"_id": 7,
"item": "abc",
"name": "Keith",
"price": NumberDecimal("7.5"),
"quantity": NumberInt("10"),
"date": ISODate("2015-09-10T08:43:00Z")
},
{
"_id": 8,
"item": "abc",
"name": "Michael",
"price": NumberDecimal("10"),
"quantity": NumberInt("5"),
"date": ISODate("2016-02-06T20:20:13Z")
},
]
I would like to receive this result:
[{
"_id": {
"name": "Keith"
},
"count": 2
},
{
"_id": {
"item": "abc",
},
"count": 5
}]
Here is what I have tried so far:
db.collection.aggregate([
{
$match: {
$or: [
{
item: "abc"
},
{
name: "Keith"
}
]
}
},
{
$group: {
_id: {
item: "$item",
name: "$name"
},
count: {
$sum: 1
}
}
}
])
You can use $facet to get multiple aggregation pipelines into the same stage in this way:
Using $facet there are two "outputs" one group by name and other by item.
In each one there are multiple stages:
First $match to process only documents you want.
Then $group with _id name or item, and $count to get the total.
db.collection.aggregate([
{
"$facet": {
"groupByName": [
{
"$match": {"name": "Keith"}
},
{
"$group": {"_id": "$name","count": {"$sum": 1}}
}
],
"groupByItem": [
{
"$match": {"item": "abc"}
},
{
"$group": {"_id": "$item","count": {"$sum": 1}}
}
]
}
}
])
Example here
The output is:
{
"groupByItem": [
{
"_id": "abc",
"count": 5
}
],
"groupByName": [
{
"_id": "Keith",
"count": 2
}
]
}
Here it is:
mongos> db.n.aggregate([ { $facet:{ names:[ {$match:{name:"Keith"}} , {$group:{_id:{name:"$name"}, count:{$sum:1}}} ] , items:[ {$match:{item:"abc"}},{ $group:{_id:{item:"$item"}, count:{$sum:1}} } ] } } , {$project:{ "namesANDitems":{$concatArrays:[ "$names","$items" ]} }} ,{$unwind:"$namesANDitems"} ,{$replaceRoot:{newRoot:"$namesANDitems"} } ]).pretty()
{ "_id" : { "name" : "Keith" }, "count" : 2 }
{ "_id" : { "item" : "abc" }, "count" : 5 }
mongos>
explained:
You create two pipes via $facet
Match in every facet pipe what you need to group pipe1=names , pipe2=items
Join the arrays from the two pipes in single array named "namesANDitems"
Convert the array to object with $unwind
Remove the temporary object name namesANDitems so you have only the two objects as requested

mongodb distinct query values

I have the following mongodb documents:
{
"_id": "",
"name": "example1",
"colors": [
{
"id": 1000000,
"properties": [
{
"id": "1000",
"name": "",
"value": "green"
},
{
"id": "2000",
"name": "",
"value": "circle"
}
]
} ]
}
{
"_id": "",
"name": "example2",
"colors": [
{
"id": 1000000,
"properties": [
{
"id": "1000",
"name": "",
"value": "red"
},
{
"id": "4000",
"name": "",
"value": "box"
}
]
} ]
}
I would like to get distinct queries on the value field in the array where id=1000
db.getCollection('product').distinct('colors.properties.value', {'colors.properties.id':{'$eq': 1000}})
but it returns all values in the array.
The expected Result would be:
["green", "red"]
There are a lot of way to do.
$match eliminates unwanted data
$unwind de-structure the array
$addToSet in $group gives the distinct data
The mongo script :
db.collection.aggregate([
{
$match: {
"colors.properties.id": "1000"
}
},
{
"$unwind": "$colors"
},
{
"$unwind": "$colors.properties"
},
{
$match: {
"colors.properties.id": "1000"
}
},
{
$group: {
_id: null,
distinctData: {
$addToSet: "$colors.properties.value"
}
}
}
])
Working Mongo playground

Combining unique elements of arrays without $unwind

I would like to get the unique elements of all arrays in a collection. Consider the following collection
[
{
"collection": "collection",
"myArray": [
{
"name": "ABC",
"code": "AB"
},
{
"name": "DEF",
"code": "DE"
}
]
},
{
"collection": "collection",
"myArray": [
{
"name": "GHI",
"code": "GH"
},
{
"name": "DEF",
"code": "DE"
}
]
}
]
I can achieve this by using $unwind and $group like this:
db.collection.aggregate([
{
$unwind: "$myArray"
},
{
$group: {
_id: null,
data: {
$addToSet: "$myArray"
}
}
}
])
And get the output:
[
{
"_id": null,
"data": [
{
"code": "GH",
"name": "GHI"
},
{
"code": "DE",
"name": "DEF"
},
{
"code": "AB",
"name": "ABC"
}
]
}
]
However, the array "myArray" will have a lot of elements (about 6) and the number of documents passed into this stage of the pipeline will be about 600. So unwinding the array would give me a total of 3600 documents being processed. I would like to know if there's a way for me to achieve the same result without unwinding
You can use below aggregation
db.collection.aggregate([
{ "$group": {
"_id": null,
"data": { "$push": "$myArray" }
}},
{ "$project": {
"data": {
"$reduce": {
"input": "$data",
"initialValue": [],
"in": { "$setUnion": ["$$this", "$$value"] }
}
}
}}
])
Output
[
{
"_id": null,
"data": [
{
"code": "AB",
"name": "ABC"
},
{
"code": "DE",
"name": "DEF"
},
{
"code": "GH",
"name": "GHI"
}
]
}
]