Retrieve matched document from nested array [duplicate] - mongodb

This question already has answers here:
Find in Double Nested Array MongoDB
(2 answers)
Closed 4 years ago.
I'm trying to collect all objects in a nested array where the field spec equals unknown.
The structure per document is similar to this:
{
"_id" :"5b1e73786f11e421956023c3",
"subs" : [
{
"name" : "subrepo1",
"files" : [
{
"name" : ".....",
"spec" : "Unknown"
},
{
"name" : ".....",
"spec" : "Unknown"
}
]
},
{
"name" : "subrepo2",
"files" : [
{
"name" : "file2",
"spec" : "Unknown"
},
{
"name" : ".....",
"spec" : "1234"
}
]
}
]
}
I tried the following but it doesn't work. I'm think this is in the right direction but i'm probably missing something important.
db.col.aggregate([
{$match: {'subs.files.spec': 'Unknown'}},
{$project: {
'subs.files': {$filter: {
input: '$subs.files',
//as: 'subs.files',
cond: {$eq: ['this.spec', 'FunSuite']}
}},
//_id: 0
}}
])
The expected output would be: (so ONLY the files that have spec equals to Unknown (NOT the other ones)
{
"_id" : "5b1e73786f11e421956023c3",
"subs" : [
{
"name" : "subrepo1",
"files" : [
{
"name" : ".....",
"spec" : "Unknown"
},
{
"name" : ".....",
"spec" : "Unknown"
}
]
},
{
"name" : "subrepo2",
"files" : [
{
"name" : "file2",
"spec" : "Unknown"
}
]
}
]
}

You need to use $filter aggregation operator which gives only the matched element from the array and escapes the other elements
db.collection.aggregate([
{
$unwind: "$subs"
},
{
$project: {
"subs.name": "$subs.name",
"subs.files": {
$filter: {
input: "$subs.files",
as: "file",
cond: {
$eq: [
"$$file.spec",
"Unknown"
]
}
}
}
}
},
{
$group: {
_id: "$_id",
subs: {
$push: "$subs"
}
}
}
])
Above will give following output
[
{
"_id": ObjectId("5a934e000102030405000000"),
"subs": [
{
"files": [
{
"name": ".....",
"spec": "Unknown"
},
{
"name": ".....",
"spec": "Unknown"
}
],
"name": "subrepo1"
},
{
"files": [
{
"name": "file2",
"spec": "Unknown"
}
],
"name": "subrepo2"
}
]
}
]
You can check the result here
And if you want to get the fields as in array then remove the $unwind and $replaceRoot stage from the pipeline
db.collection.aggregate([
{
$unwind: "$subs"
},
{
$project: {
"subs.name": "$subs.name",
"subs.files": {
$filter: {
input: "$subs.files",
as: "file",
cond: {
$eq: [
"$$file.spec",
"Unknown"
]
}
}
}
}
},
{
$unwind: "$subs.files"
},
{
$replaceRoot: {
newRoot: "$subs.files"
}
}
])
Above will give following output
[
{
"name": ".....",
"spec": "Unknown"
},
{
"name": ".....",
"spec": "Unknown"
},
{
"name": "file2",
"spec": "Unknown"
}
]

Try this way:
db.col.aggregate([
{
$unwind: '$subs'
},
{
$unwind: '$subs.files'
},
{
$match: {
'subs.files.spec': 'Unknown'
}
}
]);

Related

Query nested array from document

Given the following document data in collection called 'blah'...
[
{
"_id" : ObjectId("60913f55987438922d5f0db6"),
"procedureCode" : "code1",
"description" : "Description 1",
"coding" : [
{
"system" : "ABC",
"code" : "L111"
},
{
"system" : "DEFG",
"code" : "S222"
}
]
},
{
"_id" : ObjectId("60913f55987438922d5f0dbc"),
"procedureCode" : "code2",
"description" : "Description 2",
"coding" : [
{
"system" : "ABC",
"code" : "L999"
},
{
"system" : "DEFG",
"code" : "X3333"
}
]
}
]
What I want to get is all of the coding elements where system is ABC for all parents, and an array of codes like so.
[
{ "code": "L111" },
{ "code": "L999" },
]
If I use db.getCollection('blah').find({"coding.system": "ABC"}) I get the parent document with any child in the coding array of ICD.
If I use...
db.getCollection("blah")
.find({ "coding.system": "ABC" })
.projection({ "coding.code": 1 })
I do get the parent documents which have a child with a system of "ABC", but the coding for "DEFG" seems to come along for the ride too.
{
"_id" : ObjectId("60913f55987438922d5f0db6"),
"coding" : [
{
"code" : "L989"
},
{
"code" : "S102"
}
]
},
{
"_id" : ObjectId("60913f55987438922d5f0dbc"),
"coding" : [
{
"code" : "L989"
},
{
"code" : "X382"
}
]
}
I have also tried experimenting with:
db.getCollection("blah").aggregate(
{ $unwind: "$coding" },
{ $match: { "system": "ICD" } }
);
.. as per this page: mongoDB query to find the document in nested array
... but go no where fast with that approach. i.e. no records at all.
What query do I need, please, to achieve something like this..?
[
{ "code": "L111" },
{ "code": "L999" },
...
]
or even better, this..?
[
"L111",
"L999",
...
]
db.collection.aggregate([
{
$match: { "coding.system": "ABC" }
},
{
$unwind: "$coding"
},
{
$match: { "coding.system": "ABC" }
},
{
$project: { code: "$coding.code" }
}
])
mongoplayground
db.collection.aggregate([
{
$match: { "coding.system": "ABC" }
},
{
$unwind: "$coding"
},
{
$match: { "coding.system": "ABC" }
},
{
$group: {
_id: null,
coding: { $push: "$coding.code" }
}
}
])
mongoplayground
Instead of $unwind, $match you can also use $filter:
db.collection.aggregate([
{ $match: { "coding.system": "ABC" } },
{
$project: {
coding: {
$filter: {
input: "$coding",
cond: { $eq: [ "$$this.system", "ABC" ] }
}
}
}
}
])

MongoDB: How to make two unions to the same collection with matrices

I have user records with posts and posts shared with them, that is, users can share the posts with other users. I need to be able to bring or get only the posts shared with him, using as a reference the id of the user who shared the post and the id of the post.
when I use the user id as a reference, it works, but when I try to combine it with the id of the post it does not get anything, this happens when I try to use only the id of the post to get the shared posts.
This would be the structure of the records
{
"_id" : ObjectId("5cd573b2bb9ad84f9bba2f74"),
"name" : "name 4",
"posts" : [
{
"_id" : ObjectId("5cd573b2bb9ad84f9bba2f72"),
"name" : "post 1"
},
{
"_id" : ObjectId("5cd573b2bb9ad84f9bba2f73"),
"name" : "post 2"
}
],
"postSharedWithMe" : [
{
"user_id" : "5cd4aaedfcf8d8583cf97494",
"post_id" : "5cd4aaedfcf8d8583cf97492"
},
{
"user_id" : "5cd4aaedfcf8d8583cf97494",
"post_id" : "5cd4aaedfcf8d8583cf97493"
}
]
}
and in this way he tried to consult them
db.users.aggregate([
{ "$match": { "_id": ObjectId("5cd573b2bb9ad84f9bba2f74") }},
{ $unwind:"$postSharedWithMe" },
{ $unwind:"$posts" },
{
$lookup:
{
from: "users",
let: {
user_id: { "$toObjectId": "$postSharedWithMe.user_id"},
post_id : { "$toObjectId": "$postSharedWithMe.post_id"}
},
pipeline: [
{ $match:
{ $expr:
{ $and:
[
{ $eq: [ "$_id", "$$user_id" ] },
{ $eq: [ "$posts._id", "$$post_id" ] }
]
}
}
},
],
as: "sharedPosts"
}
},
{ $unwind:"$sharedPosts" },
{ "$group": {
"_id": "$_id",
"sharedPosts": { "$push": "$sharedPosts" }
}
}
])
and this is the result:
Fetched 0 record(s) in 0ms
and this is what I expected
{
"_id" : ObjectId("5cd573b2bb9ad84f9bba2f74"),
"name" : "username",
"posts" : [
{
"_id" : ObjectId("5cd573b2bb9ad84f9bba2f72"),
"name" : "post 1"
},
{
"_id" : ObjectId("5cd573b2bb9ad84f9bba2f73"),
"name" : "post 2"
}
],
"sharedPosts" : [
{
"_id" : ObjectId("id"),
"name" : "shared post"
},
{
"_id" : ObjectId("id"),
"name" : "shared post"
}
]
}
apparently I needed to go through all the posts first while referring to the shared posts, the result of this was an array, now I just needed to make the $ unwind and compare with $ eq and it worked!
db.users.aggregate([
{ $match: { "_id": ObjectId("5cd573b2bb9ad84f9bba2f74") }},
{ $unwind: "$postSharedWithMe" },
{
$lookup:
{
from: "users",
let: {
user_id: { $toObjectId: "$postSharedWithMe.user_id"},
post_id : { $toObjectId: "$postSharedWithMe.post_id"}
},
pipeline: [
{ $match:
{ $expr:
{ $and:
[
{ $eq: [ "$$user_id", "$_id" ] },
{ $in: ["$$post_id", "$posts._id" ] },
]
}
}
},
{ $unwind: "$posts" },
{ $match: { $expr: { $eq: [ "$posts._id", "$$post_id" ] } } },
],
as: "sharedPosts"
}
},
{ $unwind: "$sharedPosts" },
{ $group: {
_id: "$_id",
name: { "$first": "$name" },
posts: { "$first": "$posts" },
sharedPosts: { $push:
"$sharedPosts.posts"
}
}
}
])

Combine results based on condition during group by

Mongo query generated out of java code:
{
"pipeline": [{
"$match": {
"Id": "09cd9a5a-85c5-4948-808b-20a52d92381a"
}
},
{
"$group": {
"_id": "$result",
"id": {
"$first": "$result"
},
"labelKey": {
"$first": {
"$ifNull": ["$result",
"$result"]
}
},
"value": {
"$sum": 1
}
}
}]
}
Field 'result' can have values like Approved, Rejected, null and "" (empty string). What I am trying to achieve is combining the count of both null and empty together.
So that the empty string Id will have the count of both null and "", which is equal to 4
I'm sure theres a more "proper" way but this is what i could quickly come up with:
[
{
"$group" : {
"_id" : "$result",
"id" : {
"$first" : "$result"
},
"labelKey" : {
"$first" : {
"$ifNull" : [
"$result",
"$result"
]
}
},
"value" : {
"$sum" : 1.0
}
}
},
{
"$group" : {
"_id" : {
"$cond" : [{
$or: [
{"$eq": ["$_id", "Approved"]},
{"$eq": ["$_id", "Rejected"]},
]}},
"$_id",
""
]
},
"temp" : {
"$push" : {
"_id" : "$_id",
"labelKey" : "$labelKey"
}
},
"count" : {
"$sum" : "$value"
}
}
},
{
"$unwind" : "$temp"
},
{
"$project" : {
"_id" : "$temp._id",
"labelKey": "$temp.labelKey",
"count" : "$count"
}
}
],
);
Due to the fact the second group is only on 4 documents tops i don't feel too bad about doing this.
I have used $facet.
The MongoDB stage $facet lets you run several independent pipelines within the stage of a pipeline, all using the same data. This means that you can run several aggregations with the same preliminary stages, and successive stages.
var queries = [{
"$match": {
"Id": "09cd9a5a-85c5-4948-808b-20a52d92381a"
}
},{
$facet: {//
"empty": [
{
$match : {
result : { $in : ['',null]}
}
},{
"$group" : {
"_id" : null,
value : { $sum : 1}
}
}
],
"non_empty": [
{
$match : {
result : { $nin : ['',null]}
}
},{
"$group" : {
"_id" : '$result',
value : { $sum : 1}
}
}
]
}
},
{
$project: {
results: {
$concatArrays: [ "$empty", "$non_empty" ]
}
}
}];
Output :
{
"results": [{
"_id": null,
"value": 52 // count of both '' and null.
}, {
"_id": "Approved",
"value": 83
}, {
"_id": "Rejected",
"value": 3661
}]
}
Changing the group by like below solved the problem
{
"$group": {
"_id": {
"$ifNull": ["$result", ""]
},
"id": {
"$first": "$result"
},
"labelKey": {
"$first": {
"$ifNull": ["$result",
"$result"]
}
},
"value": {
"$sum": 1
}
}
}

$elemMatch against two Array elements if one fails

A bit odd but this is what I am looking for.
I have an array as follow:
Document 1:
Items: [
{
"ZipCode": "11111",
"ZipCode4" "1234"
}
Document 2:
Items: [
{
"ZipCode": "11111",
"ZipCode4" "0000"
}
I would like to use a single query, and send a filter on ZipCode = 1111 && ZipCode4 = 4321, if this fails, the query should look for ZipCode = 1111 && ZipCode4: 0000
Is there a way to do this in a single query ? or do I need to make 2 calls to my database ?
For matching both data set (11111/4321) and (11111/0000), you can use $or and $and with $elemMatch like the following :
db.test.find({
$or: [{
$and: [{
"Items": {
$elemMatch: { "ZipCode": "11111" }
}
}, {
"Items": {
$elemMatch: { "ZipCode4": "4321" }
}
}]
}, {
$and: [{
"Items": {
$elemMatch: { "ZipCode": "11111" }
}
}, {
"Items": {
$elemMatch: { "ZipCode4": "0000" }
}
}]
}]
})
As you want conditional staging, this is not possible but we can get closer to it like this :
db.test.aggregate([{
$match: {
$or: [{
$and: [{ "Items.ZipCode": "11111" }, { "Items.ZipCode4": "4321" }]
}, {
$and: [{ "Items.ZipCode": "11111" }, { "Items.ZipCode4": "0000" }]
}]
}
}, {
$project: {
Items: 1,
match: {
"$map": {
"input": "$Items",
"as": "val",
"in": {
"$cond": [
{ $and: [{ "$eq": ["$$val.ZipCode", "11111"] }, { "$eq": ["$$val.ZipCode4", "4321"] }] },
true,
false
]
}
}
}
}
}, {
$unwind: "$match"
}, {
$group: {
_id: "$match",
data: {
$push: {
_id: "$_id",
Items: "$Items"
}
}
}
}])
The first $match is for selecting only the items we need
The $project will build a new field that check if this items is from the 1st set of data (11111/4321) or the 2nd set of data (11111/0000).
The $unwind is used to remove the array generated by $map.
The $group group by set of data
So in the end you will have an output like the following :
{ "_id" : true, "data" : [ { "_id" : ObjectId("58af69ac594b51730a394972"), "Items" : [ { "ZipCode" : "11111", "ZipCode4" : "4321" } ] }, { "_id" : ObjectId("58af69ac594b51730a394974"), "Items" : [ { "ZipCode" : "11111", "ZipCode4" : "4321" } ] } ] }
{ "_id" : false, "data" : [ { "_id" : ObjectId("58af69ac594b51730a394971"), "Items" : [ { "ZipCode" : "11111", "ZipCode4" : "0000" } ] } ] }
Your application logic can check if there is _id:true in this output array, just take the corresponding data field for _id:true. If there is _id:false in this object take the corresponding data field for _id:false.
In the last $group, you can also use $addToSet to builds 2 field data1 & data2 for both type of data set but this will be painful to use as it will add null object to the array for each one of the opposite type :
"$addToSet": {
"$cond": [
{ "$eq": ["$_id", true] },
"$data",
null
]
}
Here is a gist

MongoDB query with conditional group by statement

I need to export customer records from database of mongoDB. Exported customer records should not have duplicated values. "firstName+lastName+code" is the key to DE-duped the record and If there are two records present in database with same key then I need to give preference to source field with value other than email.
customer (id,firstName,lastName,code,source) collection is this.
If there are record 3 records with same unique key and 3 different sources then i need to choose only one record between 2 sources(TV,internet){or if there are n number of sources i need the one record only}not with the 'email'(as email will be choosen when only one record is present with the unique key and source is email)
query using:
db.customer.aggregate([
{
"$match": {
"active": true,
"dealerCode": { "$in": ["111391"] },
"source": { "$in": ["email", "TV", "internet"] }
}
},
{
$group: {
"_id": {
"firstName": "$personalInfo.firstName",
"lastName": "$personalInfo.lastName",
"code": "$vehicle.code"
},
"source": {
$addToSet: { "source": "$source" }
}
}
},
{
$redact:
{
$cond: [
{ $eq: [{ $ifNull: ["$source", "other"] }, "email"] },
"$$PRUNE",
"$$DESCEND"
]
}
},
{
$project:
{
"source":
{
$map:
{
"input": {
$cond: [
{ $eq: [{ $size: "$source" }, 0] },
[{ "source": "email" }],
"$source"
]
},
"as": "inp",
"in": "$$inp.source"
}
},
"record": { "_id": 1 }
}
}
])
sample output:
{ "_id" : { "firstName" : "sGI6YaJ36WRfI4xuJQzI7A==", "lastName" : "99eQ7i+uTOqO8X+IPW+NOA==", "code" : "1GTHK23688F113955" }, "source" : ["internet"] }
{ "_id" : { "firstName" : "WYDROTF/9vs9O7XhdIKd5Q==", "lastName" : "BM18Uq/ltcbdx0UJOXh7Sw==", "code" : "1G4GE5GV5AF180133" }, "source" : ["internet"] }
{ "_id" : { "firstName" : "id+U2gYNHQaNQRWXpe34MA==", "lastName" : "AIs1G33QnH9RB0nupJEvjw==", "code" : "1G4GE5EV0AF177966" }, "source" : ["internet"] }
{ "_id" : { "firstName" : "qhreJVuUA5l8lnBPVhMAdw==", "lastName" : "petb0Qx3YPfebSioY0wL9w==", "code" : "1G1AL55F277253143" }, "source" : ["TV"] }
{ "_id" : { "firstName" : "qhreJVuUA5l8lnBPVhMAdw==", "lastName" : "6LB/NmhbfqTagbOnHFGoog==", "code" : "1GCVKREC0EZ168134" }, "source" : ["TV", "internet"] }
This is a problem with this query please suggest :(
Your code doesn't work, because $cond is not an accumulator operator. Only these accumulator operators, can be used in a $group stage.
Assuming your records contain not more than two possible values of source as you mention in your question, you could add a conditional $project stage and modify the $group stage as,
Code:
db.customer.aggregate([
{
$group: {
"_id": {
"id": "$id",
"firstName": "$firstName",
"lastName": "$lastName",
"code": "$code"
},
"sourceA": { $first: "$source" },
"sourceB": { $last: "$source" }
}
},
{
$project: {
"source": {
$cond: [
{ $eq: ["$sourceA", "email"] },
"$sourceB",
"$sourceA"
]
}
}
}
])
In case there can be more that two possible values for source, then you could do the following:
Group by the id, firstName, lastName and code. Accumulate
the unique values of source, using the $addToSet operator.
Use $redact to keep only the values other than email.
Project the required fields, if the source array is empty(all the elements have been removed), add a
value email to it.
Unwind the source field to list it as a field and not an array.
(optional)
Code:
db.customer.aggregate([
{
$group: {
"_id": {
"id": "$id",
"firstName": "$firstName",
"lastName": "$lastName",
"code": "$code"
},
"sourceArr": { $addToSet: { "source": "$source" } }
}
},
{
$redact: {
$cond: [
{ $eq: [{ $ifNull: ["$source", "other"] }, "email"] },
"$$PRUNE",
"$$DESCEND"
]
}
},
{
$project: {
"source": {
$map: {
"input":
{
$cond: [
{ $eq: [{ $size: "$sourceArr" }, 0] },
[{ "source": "item" }],
"$sourceArr"]
},
"as": "inp",
"in": "$$inp.source"
}
}
}
}
])