Mongo aggregation to collect array elements based on field name - mongodb

I have an aggregated mongo document like below. There are two different batches ("-Minor" and "-Major"), and each batch has "batchElements" too.
{
"_id" : "123",
"info" : {
"batch" : "Batch1-Minor"
},
"batchElements" : {
"elements" : [
{ }, { }, .... { }
]
}
},
{
"_id" : "123",
"info" : {
"batch" : "Batch2-Minor"
},
"batchElements" : {
"elements" : [
{ }, { }, .... { }
]
}
},
{
"_id" : "123",
"info" : {
"batch" : "Batch3-Major"
},
"batchElements" : {
"elements" : [
{ }, { }, .... { }
]
}
},
{
"_id" : "123",
"info" : {
"batch" : "Batch4-Major"
},
"batchElements" : {
"elements" : [
{ }, { }, .... { }
]
}
}
How can I collect all "batchElements" of "-Minor" and "-Major" and create a document as below;
Output:
{
"_id" : "123",
"minorElements" : [
[{}, {}, {}, ..... {} ], // elements of "Batch1-Minor"
[{}, {}, {}, ..... {} ], // elements of "Batch2-Minor"
... // elements of "BatchN-Minor"
],
"majorElements" : [
[{}, {}, {}, ..... {} ], // elements of "Batch3-Major"
[{}, {}, {}, ..... {} ], // elements of "Batch4-Major"
... // elements of "BatchN-Major"
]
}

You can start with $split to get the "type" of your batch as part of your $group _id. Then you can run another $group to make minor and major parts of the same document. In the last step you need $replaceRoot along with $arrayToObject to promote both arrays into root level.
db.collection.aggregate([
{
$group: {
_id: {
id: "$_id",
type: { $arrayElemAt: [ { $split: [ { $toLower: "$info.batch" }, "-" ] }, 1 ] }
},
docs: { $push: "$batchElements.elements" }
}
},
{
$group: {
_id: "$_id.id",
data: { $push: { k: { $concat: ["$_id.type","Elements"] }, v: "$docs" } }
}
},
{
$replaceRoot: {
newRoot: {
$mergeObjects: [ { _id: "$_id" }, { $arrayToObject: "$data" } ]
}
}
}
])
Mongo Playground

Related

Query nested array from document

Given the following document data in collection called 'blah'...
[
{
"_id" : ObjectId("60913f55987438922d5f0db6"),
"procedureCode" : "code1",
"description" : "Description 1",
"coding" : [
{
"system" : "ABC",
"code" : "L111"
},
{
"system" : "DEFG",
"code" : "S222"
}
]
},
{
"_id" : ObjectId("60913f55987438922d5f0dbc"),
"procedureCode" : "code2",
"description" : "Description 2",
"coding" : [
{
"system" : "ABC",
"code" : "L999"
},
{
"system" : "DEFG",
"code" : "X3333"
}
]
}
]
What I want to get is all of the coding elements where system is ABC for all parents, and an array of codes like so.
[
{ "code": "L111" },
{ "code": "L999" },
]
If I use db.getCollection('blah').find({"coding.system": "ABC"}) I get the parent document with any child in the coding array of ICD.
If I use...
db.getCollection("blah")
.find({ "coding.system": "ABC" })
.projection({ "coding.code": 1 })
I do get the parent documents which have a child with a system of "ABC", but the coding for "DEFG" seems to come along for the ride too.
{
"_id" : ObjectId("60913f55987438922d5f0db6"),
"coding" : [
{
"code" : "L989"
},
{
"code" : "S102"
}
]
},
{
"_id" : ObjectId("60913f55987438922d5f0dbc"),
"coding" : [
{
"code" : "L989"
},
{
"code" : "X382"
}
]
}
I have also tried experimenting with:
db.getCollection("blah").aggregate(
{ $unwind: "$coding" },
{ $match: { "system": "ICD" } }
);
.. as per this page: mongoDB query to find the document in nested array
... but go no where fast with that approach. i.e. no records at all.
What query do I need, please, to achieve something like this..?
[
{ "code": "L111" },
{ "code": "L999" },
...
]
or even better, this..?
[
"L111",
"L999",
...
]
db.collection.aggregate([
{
$match: { "coding.system": "ABC" }
},
{
$unwind: "$coding"
},
{
$match: { "coding.system": "ABC" }
},
{
$project: { code: "$coding.code" }
}
])
mongoplayground
db.collection.aggregate([
{
$match: { "coding.system": "ABC" }
},
{
$unwind: "$coding"
},
{
$match: { "coding.system": "ABC" }
},
{
$group: {
_id: null,
coding: { $push: "$coding.code" }
}
}
])
mongoplayground
Instead of $unwind, $match you can also use $filter:
db.collection.aggregate([
{ $match: { "coding.system": "ABC" } },
{
$project: {
coding: {
$filter: {
input: "$coding",
cond: { $eq: [ "$$this.system", "ABC" ] }
}
}
}
}
])

Mongodb aggregate to add field using map, filter, reduce

Update: I think i was not clear in my description (very sorry) so I made an update to the question to be more clear for what I need
I have the following data in MongoDB
{
"category": "Threats",
"_id": ObjectId("5e13a29353ff464eb389c385"),
"Relations" : [
{
"Threats" : ObjectId("5e13a29353ff464eb389c385"),
"Breaches" : ObjectId("5e13a29353ff464eb389c04a")
},
{
"Threats" : ObjectId("5e13a29353ff464eb389c385"),
"Stories" : ObjectId("5e13a29453ff464eb389c79b")
},
{
"Threats" : ObjectId("5e13a29353ff464eb389c385"),
"Videos" : ObjectId("5e13a2ca53ff464eb389d3ef")
},
{
"Threats" : ObjectId("5e13a29353ff464eb389c385"),
"Actors" : ObjectId("5e13a2ca53ff464eb389d59f")
},
{
"Threats" : ObjectId("5e13a29353ff464eb389c385"),
"Companies" : ObjectId("5e13a2c953ff464eb389cfa0")
},
{
"Threats" : ObjectId("5e13a29353ff464eb389c385"),
"Stories" : ObjectId("5e13a2ca53ff464eb389d5b3")
}
]
}
and I want to know what is the most efficient way to add another field (finalArray) that includes all the ObjectId that is not Threats like so :
Update: I want the finalArray field to have all the ids that are not equal to the item id. I hope that makes sense, and I was clear enough,
{
"finalArray" : [
ObjectId("5e13a29353ff464eb389c04a"),
ObjectId("5e13a29453ff464eb389c79b"),
ObjectId("5e13a29353ff464eb389c385"),
ObjectId("5e13a2ca53ff464eb389d3ef"),
ObjectId("5e13a2ca53ff464eb389d59f"),
ObjectId("5e13a2c953ff464eb389cfa0"),
ObjectId("5e13a2ca53ff464eb389d5b3")
]
}
I was trying to do it for a long time :(, and I did it but I have used a lot of pipe stages to make it and i feel it is not efficient at all, so I appreciate any help in this as there is a lot of tasks in my project that depends on this. thanks in advance.
Maybe not the shortest aggregation but it works:
db.col.aggregate([
{
$facet:
{
Relations: [{ $match: {} }],
finalArray: [
{ $unwind: "$Relations" },
{ $unset: "Relations.Threats" },
{ $set: { Relations: { $objectToArray: "$Relations" } } },
{ $unwind: "$Relations" },
{
$group: {
_id: "$_id",
finalArray: { $push: "$Relations.v" },
}
},
]
}
},
{ $unwind: "$Relations" },
{ $unwind: "$finalArray" },
{ $match: { $expr: { $eq: ["$Relations._id", "$finalArray._id"] } } },
{
$project: {
_id: "$finalArray._id",
Relations: "$Relations.Relations",
finalArray: "$finalArray.finalArray",
}
}
])
Mongo Playground
[
{
"Relations": [
{
"Breaches": ObjectId("5e13a29353ff464eb389c04a"),
"Threats": ObjectId("5e13a29353ff464eb389c385")
},
{
"Stories": ObjectId("5e13a29453ff464eb389c79b"),
"Threats": ObjectId("5e13a29353ff464eb389c385")
},
{
"Threats": ObjectId("5e13a29353ff464eb389c385"),
"Videos": ObjectId("5e13a2ca53ff464eb389d3ef")
},
{
"Actors": ObjectId("5e13a2ca53ff464eb389d59f"),
"Threats": ObjectId("5e13a29353ff464eb389c385")
},
{
"Companies": ObjectId("5e13a2c953ff464eb389cfa0"),
"Threats": ObjectId("5e13a29353ff464eb389c385")
},
{
"Stories": ObjectId("5e13a2ca53ff464eb389d5b3"),
"Threats": ObjectId("5e13a29353ff464eb389c385")
}
],
"_id": ObjectId("5a934e000102030405000000"),
"finalArray": [
ObjectId("5e13a29353ff464eb389c04a"),
ObjectId("5e13a29453ff464eb389c79b"),
ObjectId("5e13a2ca53ff464eb389d3ef"),
ObjectId("5e13a2ca53ff464eb389d59f"),
ObjectId("5e13a2c953ff464eb389cfa0"),
ObjectId("5e13a2ca53ff464eb389d5b3")
]
}
]
Another solution, which I like much more:
db.col.aggregate([
{ $addFields: { finalArray: "$Relations" } },
{ $unset: "finalArray.Threats" },
{ $set: { finalArray: { $mergeObjects: "$finalArray" } } },
{ $set: { finalArray: { $objectToArray: "$finalArray" } } },
{ $set: { finalArray: "$finalArray.v" } }
])
If you only need the finalArray then use:
db.col.aggregate([
{ $project: { finalArray: "$Relations" } },
{ $unset: "finalArray.Threats" },
{ $set: { finalArray: { $mergeObjects: "$finalArray" } } },
{ $set: { finalArray: { $objectToArray: "$finalArray" } } },
{ $set: { finalArray: "$finalArray.v" } }
])
If you have only fixed number of keys inside array and you know all of them.
then you can use this query.
db.collection.aggregate([ {
$addFields: {
finalArray: { $concatArrays : [
"$Relations.Breaches",
"$Relations.Stories",
"$Relations.Companies",
"$Relations.Actors",
"$Relations.Videos"]}
}
}])
The Result of the Above Query
{
"_id" : ObjectId("5e3bd0a17f1e40317b8c4377"),
"Relations" : [
{
"Threats" : ObjectId("5e13a29353ff464eb389c385"),
"Breaches" : ObjectId("5e13a29353ff464eb389c04a")
},
{
"Threats" : ObjectId("5e13a29353ff464eb389c385"),
"Stories" : ObjectId("5e13a29453ff464eb389c79b")
},
{
"Threats" : ObjectId("5e13a29353ff464eb389c385"),
"Videos" : ObjectId("5e13a2ca53ff464eb389d3ef")
},
{
"Threats" : ObjectId("5e13a29353ff464eb389c385"),
"Actors" : ObjectId("5e13a2ca53ff464eb389d59f")
},
{
"Threats" : ObjectId("5e13a29353ff464eb389c385"),
"Companies" : ObjectId("5e13a2c953ff464eb389cfa0")
},
{
"Threats" : ObjectId("5e13a29353ff464eb389c385"),
"Stories" : ObjectId("5e13a2ca53ff464eb389d5b3")
}
],
"finalArray" : [
ObjectId("5e13a29353ff464eb389c04a"),
ObjectId("5e13a29453ff464eb389c79b"),
ObjectId("5e13a2ca53ff464eb389d5b3"),
ObjectId("5e13a2c953ff464eb389cfa0"),
ObjectId("5e13a2ca53ff464eb389d59f"),
ObjectId("5e13a2ca53ff464eb389d3ef")
]
}

Filter keys not in collection

How do we find keys which do not exist in collection.
Given an input list of keys ['3321', '2121', '5647'] , i want to return those that do not exist in the collection :
{ "_id" : { "$oid" : "5e2993b61886a22f400ea319" }, "scrip" : "5647" }
{ "_id" : { "$oid" : "5e2993b61886a22f400ea31a" }, "scrip" : "3553" }
So the expected output is ['3321', '2121']
This aggregation gets the desired output (works with MongoDB version 3.4 or later):
INPUT_ARRAY = ['3321', '2121', '5647']
db.test.aggregate( [
{
$match: {
scrip: {
$in: INPUT_ARRAY
}
}
},
{
$group: {
_id: null,
matches: { $push: "$scrip" }
}
},
{
$project: {
scrips_not_exist: { $setDifference: [ INPUT_ARRAY, "$matches" ] },
_id: 0
}
}
] )
The output:
{ "scrips_not_exist" : [ "3321", "2121" ] }

Query datevalue of a inner Array element

Need help with some MongoDB query:
The document I have is below and I am trying to search based on 2 conditions
The meta.tags.code = "ABC"
Its LastSyncDateTime should
meta.extension.value == "" (OR)
the meta.extension.value is less than meta.lastUpdated
Data :
{
"meta" : {
"extension" : [
{
"url" : "LastSyncDateTime",
"value" : "20190206-00:49:25.694"
},
{
"url" : "RetryCount",
"value" : "0"
}
],
"lastUpdate" : "20190207-01:21:41.095",
"tags" : [
{
"code" : "ABC",
"system" : "type"
},
{
"code" : "XYZ",
"system" : "SourceSystem"
}
]
}
}
Query:
db.proc_patients_service.find({
"meta.tags.code": "ABC",
$or: [{
"meta.extension.value": ""
}, {
$expr: { "$lt": [{ "mgfunc": "ISODate", "params": [{ "$arrayElemAt": ["$meta.extension.value", 0] }] }, { "mgfunc": "ISODate", "params": ["$meta.lastUpdate"] }] }
}]
})
But it is only fetching ABC Patients whose LastSyncDateTime is empty and ignores the other condition.
Using MongoDB Aggregation, I have converted your string to date with operator $dateFromString and then compare the value as per your criteria.
db.proc_patients_service.aggregate([
{ $match: { "meta.tags.code": "ABC", } },
{ $unwind: "$meta.extension" },
{
$project: {
'meta.tags': '$meta.tags',
'meta.lastUpdate': { '$dateFromString': { 'dateString': '$meta.lastUpdate', format: "%Y%m%d-%H:%M:%S.%L" } },
'meta.extension.url': '$meta.extension.url',
'meta.extension.value': {
$cond: {
if: { $ne: ["$meta.extension.value", "0"] }, then: { '$dateFromString': { 'dateString': '$meta.extension.value', format: "%Y%m%d-%H:%M:%S.%L" } }, else: 0
}
}
}
},
{
$match: {
$or: [
{ "meta.extension.value": 0 },
{ $expr: { $lt: ["$meta.extension.value", "$meta.lastUpdate"] } }
]
}
},
{
$group: { _id: '_id', 'extension': { $push: '$meta.extension' }, "lastUpdate": { $first: '$meta.lastUpdate' }, 'tags': { $first: '$meta.tags' } }
},
{
$project: { meta: { 'extension': '$extension', lastUpdate: '$lastUpdate', 'tags': '$tags' } }
}
])

MongoDB nested object aggregation sum and sort

I have highly nested mongodb set of objects and i want to sort subofdocuments according to the result of sum their votes for example :
{
"_id":17846384es,
"company_name":"company1",
"products":[
{
"product_id":"123785",
"product_name":"product1",
"user_votes":[
{
"user_id":1,
"vote":1
},
{
"user_id":2,
"vote":2
}
]
},
{
"product_id":"98765",
"product_name":"product2",
"user_votes":[
{
"user_id":5,
"vote":3
},
{
"user_id":3,
"vote":3
}
]
}
]
}
i want to sort as descending products according to the result of sum their votes
the expected output is
{
"_id":17846384es,
"company_name":"company1",
"products":[
{
"product_id":"98765",
"product_name":"product2",
"user_votes":[
{
"user_id":5,
"vote":3
},
{
"user_id":3,
"vote":3
}
]
"votes":8
},
{
"product_id":"123785",
"product_name":"product1",
"user_votes":[
{
"user_id":1,
"vote":1
},
{
"user_id":2,
"vote":2
}
],
"votes":3
}
]
}
Any Idea ?
The following pipeline
db.products.aggregate([
{ $unwind: "$products" },
{
$project: {
company_name: 1,
products: 1,
totalVotes: {
$sum: "$products.user_votes.vote"
}
}
},
{ $sort: { totalVotes: -1 } },
{
$group: {
_id: "$_id",
company_name: { $first: "$company_name" },
products: { $push: "$products" }
}
}
])
would output
{
"_id" : "17846384es",
"company_name" : "company1",
"products" : [
{
"product_id" : "98765",
"product_name" : "product2",
"user_votes" : [
{
"user_id" : 5,
"vote" : 3
},
{
"user_id" : 3,
"vote" : 3
}
]
},
{
"product_id" : "123785",
"product_name" : "product1",
"user_votes" : [
{
"user_id" : 1,
"vote" : 1
},
{
"user_id" : 2,
"vote" : 2
}
]
}
]
}
In case you want to keep the sum of the votes at the product level as shown in your expected output simply modify the $project stage as follows
...
{
$project: {
company_name: 1,
products: {
product_id: 1,
product_name: 1,
user_votes: 1,
votes: { $sum: "$products.user_votes.vote" }
}
}
}
...