Fetch distinct values from Mongo DB nested array and output to a single array - mongodb

given below is my data in mongo db.I want to fetch all the unique ids from the field articles ,which is nested under the jnlc_subjects index .The result should contain only the articles array with distinct object Ids.
Mongo Data
{
"_id" : ObjectId("5c9216f1a21a4a31e0c7fa56"),
"jnlc_journal_category" : "Biology",
"jnlc_subjects" : [
{
"subject" : "Conservation Biology",
"views" : "123",
"articles" : [
ObjectId("5c4e93d0135edb6812200d5f"),
ObjectId("5c4e9365135edb6a12200d60"),
ObjectId("5c4e93a8135edb6912200d61")
]
},
{
"subject" : "Micro Biology",
"views" : "20",
"articles" : [
ObjectId("5c4e9365135edb6a12200d60"),
ObjectId("5c4e93d0135edb6812200d5f"),
ObjectId("5c76323fbaaccf5e0bae7600"),
ObjectId("5ca33ce19d677bf780fc4995")
]
},
{
"subject" : "Marine Biology",
"views" : "8",
"articles" : [
ObjectId("5c4e93d0135edb6812200d5f")
]
}
]
}
Required result
I want to get output in following format
articles : [
ObjectId("5c4e9365135edb6a12200d60"),
ObjectId("5c4e93a8135edb6912200d61"),
ObjectId("5c76323fbaaccf5e0bae7600"),
ObjectId("5ca33ce19d677bf780fc4995"),
ObjectId("5c4e93d0135edb6812200d5f")
]

Try as below:
db.collection.aggregate([
{
$unwind: "$jnlc_subjects"
},
{
$unwind: "$jnlc_subjects.articles"
},
{ $group: {_id: null, uniqueValues: { $addToSet: "$jnlc_subjects.articles"}} }
])
Result:
{
"_id" : null,
"uniqueValues" : [
ObjectId("5ca33ce19d677bf780fc4995"),
ObjectId("5c4e9365135edb6a12200d60"),
ObjectId("5c4e93a8135edb6912200d61"),
ObjectId("5c4e93d0135edb6812200d5f"),
ObjectId("5c76323fbaaccf5e0bae7600")
]
}

Try with this
db.collection.aggregate([
{
$unwind:{
path:"$jnlc_subjects",
preserveNullAndEmptyArrays:true
}
},
{
$unwind:{
path:"$jnlc_subjects.articles",
preserveNullAndEmptyArrays:true
}
},
{
$group:{
_id:"$_id",
articles:{
$addToSet:"$jnlc_subjects.articles"
}
}
}
])
If you don't want to $group with _id ypu can use null instead of $_id

According to description as mentioned into above question,as a solution to it please try executing following aggregate operation.
db.collection.aggregate(
// Pipeline
[
// Stage 1
{
$match: {
"_id": ObjectId("5c9216f1a21a4a31e0c7fa56")
}
},
// Stage 2
{
$unwind: {
path: "$jnlc_subjects",
}
},
// Stage 3
{
$unwind: {
path: "$jnlc_subjects.articles"
}
},
// Stage 4
{
$group: {
_id: null,
articles: {
$addToSet: '$jnlc_subjects.articles'
}
}
},
// Stage 5
{
$project: {
articles: 1,
_id: 0
}
},
]
);

Related

writing aggregate MongoDB query to calculate field count ids

I'm writing an aggregate query for the following records and output.
Data:
[
{
"_id" : ObjectId("5f3b2626927b18001db86884"),
"collections" : [
Art, Craft
]
},{
"_id" : ObjectId("5f3b2626927b18001db86885"),
"collections" : [
Craft
]
},{
"_id" : ObjectId("5f3b2626927b18001db86886"),
"collections" : [
Apex, Art
]
},
...
]
Expected Output:
count of collections id
{
Art : 2,
Craft : 2,
Apex : 1
}
Right now, we are looping through the collection to calculate count for each collections as the desired output, but it is low in performance because this collection is consists of 10,000 of records.
So, I was thinking to build an aggregate query and if someone can help me to start or point towards a right direction that would be really appreciated. Thank you.
$unwind
$group
$group
$replaceRoot
db.collection.aggregate([
{
$unwind: "$collections"
},
{
"$group": {
"_id": "$collections",
"v": {
"$sum": 1
}
}
},
{
"$group": {
"_id": null,
"collections": {
"$push": {
$arrayToObject: [
[ { "k": "$$ROOT._id", "v": "$$ROOT.v" } ]
]
}
}
}
},
{
$replaceRoot: {
newRoot: {
$mergeObjects: "$collections"
}
}
}
])
mongoplayground
I have figured a solution after checking for a while.
db.getCollection("collectionName").aggregate(
[
// get all the records with at least one collection name
{
$match: {
"collections.0": { $exists: true }
}
},
// populate the collection record
{
$lookup: {
from: "from_collection",
localField: "localField",
foreignField: "foreignField",
as: "collections"
}
},
// unwind
{ $unwind: "$collections" },
// group by the collections._id
{ $group: { _id: "$collections._id", collections: { $push: "$$ROOT.ID" } } },
// project with collection contains _id, and count
{
$project : {
collections: "$collections",
count: { $size: "$collections" }
}
}
]
).toArray();
output:
[
{
"_id" : ObjectId("61c4c42d68579f00311dd3e1"),
"collections" : [
"015151",
"015152",
"015153"
],
"count" : 3.0
},
{
"_id" : ObjectId("615f38016f40710033699939"),
"collections" : [
"014871"
],
"count" : 1.0
},
{
"_id" : ObjectId("611fed5ee0d12c00337cb009"),
"collections" : [
"014788",
"014786",
"014789",
"014787",
"014884",
"014893",
"014967",
"014968",
"015016",
"015017"
],
"count" : 10.0
}
...
]

I have this two collections namely Inward and Outward, Both collections have similar embeded sub documents contains product, batch and quantity fields

Inward collections
{"ord" : 1,
"products" : [
{
"name" : "apple",
"qty" : "10",
"batch" : "jun-2021"
},
{
"name" : "banana",
"qty" : 20,
"batch" : "jan-2021"
}
]
}
outward collections
{
"_id" : ObjectId("5edde5487957d9efea972a74"),
"inv" : 1,
"products" : [
{
"name" : "apple",
"qty" : 13,
"batch" : "jun-2021"
}
]
}
Now, I would like to perform actual stock quantity check for particular product and batch (grouping together) both the collections
You may try this way:
We join them with inward.ord = outward.inv condition.
Flatten products field.
Group by product's name and batch to sum qty value.
db.inward.aggregate([
{
$lookup: {
from: "outward",
let: {
ord: "$ord",
products: "$products"
},
pipeline: [
{
$match: {
$expr: {
$eq: [ "$$ord", "$inv" ]
}
}
},
{
$project: {
products: {
$concatArrays: [
"$$products",
"$products"
]
}
}
},
{
$unwind: "$products"
},
{
$replaceWith: "$products"
}
],
as: "products"
}
},
{
$unwind: "$products"
},
{
$group: {
_id: {
batch: "$products.batch",
name: "$products.name"
},
qty: {
$sum: "$products.qty"
}
}
}
])
MongoPlayground
Note: You need to have MongoDB v4.2

How to get nested 3 label array object in Mongo Query?

Basically the structure is :
{
"_id" : ObjectId("123123"),
"stores" : [
{
"messages" : [
{
"updated_time" : "2018-05-15T05:12:25+0000",
"message_count" : 4,
"thread_id" : "123",
"messages" : [
{
"message" : "Hi User ",
"created_time" : "2018-05-15T05:12:25+0000",
"message_id" : "111",
},
{
"message" : "This is tes",
"created_time" : "2018-05-15T05:12:21+0000",
"message_id" : "222",
}
]
},
],
"store_id" : "123"
}
]
}
I have these values to get message_id object : 111. So how to get this object, any idea or help will be appreciated. THanks
store_id: 123,
thread_id:123,
message_id:111
The simplest way would be to $unwind all the nested arrays and then use $match to get single document. You can also add $replaceRoot to get only nested document. Try:
db.collection.aggregate([
{ $unwind: "$stores" },
{ $unwind: "$stores.messages" },
{ $unwind: "$stores.messages.messages" },
{ $match: { "stores.store_id": "123", "stores.messages.thread_id": "123", "stores.messages.messages.message_id": "111" } },
{ $replaceRoot: { newRoot: "$stores.messages.messages" } }
])
Prints:
{
"created_time": "2018-05-15T05:12:25+0000",
"message": "Hi User ",
"message_id": "111"
}
To improve the performance you can use $match after every $unwind to filter out unnecessary data as soon as possible, try:
db.collection.aggregate([
{ $unwind: "$stores" },
{ $match: { "stores.store_id": "123" } },
{ $unwind: "$stores.messages" },
{ $match: { "stores.messages.thread_id": "123" } },
{ $unwind: "$stores.messages.messages" },
{ $match: { "stores.messages.messages.message_id": "111" } },
{ $replaceRoot: { newRoot: "$stores.messages.messages" } }
])

Need to sum from array object value in mongodb

I am trying to calculate total value if that value exits. But query is not working 100%. So can somebody help me to solve this problem. Here my sample document. I have attached two documents. Please these documents & find out best solution
Document : 1
{
"_id" : 1"),
"message_count" : 4,
"messages" : {
"data" : [
{
"id" : "11",
"saleValue": 1000
},
{
"id" : "112",
"saleValue": 1400
},
{
"id" : "22",
},
{
"id" : "234",
"saleValue": 111
}
],
},
"createdTime" : ISODate("2018-03-18T10:18:48.000Z")
}
Document : 2
{
"_id" : 444,
"message_count" : 4,
"messages" : {
"data" : [
{
"id" : "444",
"saleValue" : 2060
},
{
"id" : "444",
},
{
"id" : 234,
"saleValue" : 260
},
{
"id" : "34534",
}
]
},
"createdTime" : ISODate("2018-03-18T03:11:50.000Z")
}
Needed Output:
{
total : 4831
}
My query :
db.getCollection('myCollection').aggregate([
{
"$group": {
"_id": "$Id",
"totalValue": {
$sum: {
$sum: "$messages.data.saleValue"
}
}
}
}
])
So please if possible help me to solve this problem. Thanks in advance
It's not working correctly because it is aggregating all the documents in the collection; you are grouping on a constant "_id": "tempId", you just need to reference the correct key by adding the $ as:
db.getCollection('myCollection').aggregate([
{ "$group": {
"_id": "$tempId",
"totalValue": {
"$sum": { "$sum": "$messages.data.saleValue" }
}
} }
])
which in essence is a single stage pipeline version of an aggregate operation with an extra field that holds the sum expression before the group pipeline then calling that field as the $sum operator in the group.
The above works since $sum from MongoDB 3.2+ is available in both the $project and $group stages and when used in the $project stage, $sum returns the sum of the list of expressions. The expression "$messages.data.value" returns a list of numbers [120, 1200] which are then used as the $sum expression:
db.getCollection('myCollection').aggregate([
{ "$project": {
"values": { "$sum": "$messages.data.value" },
"tempId": 1,
} },
{ "$group": {
"_id": "$tempId",
"totalValue": { "$sum": "$values" }
} }
])
You can add a $unwind before your $group, in that way you will deconstructs the data array, and then you can group properly:
db.myCollection.aggregate([
{
"$unwind": "$messages.data"
},
{
"$group": {
"_id": "tempId",
"totalValue": {
$sum: {
$sum: "$messages.data.value"
}
}
}
}
])
Output:
{ "_id" : "tempId", "totalValue" : 1320 }
db.getCollection('myCollection').aggregate([
{
$unwind: "$messages.data",
$group: {
"_id": "tempId",
"totalValue": { $sum: "$messages.data.value" }
}
}
])
$unwind
According to description as mentioned into above question, as a solution please try executing following aggregate query
db.myCollection.aggregate(
// Pipeline
[
// Stage 1
{
$unwind: {
path: '$messages.data'
}
},
// Stage 2
{
$group: {
_id: {
pageId: '$pageId'
},
total: {
$sum: '$messages.data.saleValue'
}
}
},
// Stage 3
{
$project: {
pageId: '$_id.pageId',
total: 1,
_id: 0
}
}
]
);
You can do it without using $group. Grouping made other data to be managed and addressed. So, I prefer using $sum and $map as shown below:
db.getCollection('myCollection').aggregate([
{
$addFields: {
total: {
$sum: {
$map: {
input: "$messages.data",
as: "message",
in: "$$message.saleValue",
},
},
},
},
},
}
])

Finding all documents which share the same value in an array

Consider I have the following data below:
{
"id":123,
"name":"apple",
"codes":["ABC", "DEF", "EFG"]
}
{
"id":234,
"name":"pineapple",
"codes":["DEF"]
}
{
"id":345,
"name":"banana",
"codes":["HIJ","KLM"]
}
If I didn't want to search by a specific code, is there a way to find all fruits in my mongodb collection which shares the same code?
db.collection.aggregate([
{ $unwind: '$codes' },
{ $group: { _id: '$codes', count: {$sum:1}, fruits: {$push: '$name'}}},
{ $match: {'count': {$gt:1}}},
{ $group:{_id:null, total:{$sum:1}, data:{$push:{fruits: '$fruits', code:'$_id'}}}}
])
result:
{ "_id" : null, "total" : 1, "data" : [ { "fruits" : [ "apple", "pineapple" ], "code" : "DEF" } ] }