Find MongoDB object using value of another field - mongodb

I recently found difficulty in finding an object stored in a document with its key in another field of that same document.
{
list : {
"red" : 397n8,
"blue" : j3847,
"pink" : 8nc48,
"green" : 983c4,
},
result : [
{ "id" : 397n8, value : "anger" },
{ "id" : j3847, value : "water" },
{ "id" : 8nc48, value : "girl" },
{ "id" : 983c4, value : "evil" }
]
}
}
I am trying to get the value for 'blue' which has an id of 'j3847' and a value of 'water'.
db.docs.find( { result.id : list.blue }, { result.value : 1 } );
# list.blue would return water
# list.pink would return girl
# list.green would return evil
I tried many things and even found a great article on how to update a value using a value in the same document.: Update MongoDB field using value of another field which I based myself on; with no success... :/
How can I find a MongoDB object using value of another field ?

You can do it with the $filter operator within mongo aggregation. It returns an array with only those elements that match the condition:
db.docs.aggregate([
{
$project: {
result: {
$filter: {
input: "$result",
as:"item",
cond: { $eq: ["$list.blue", "$$item.id"]}
}
}
}
}
])
Output for this query looks like this:
{
"_id" : ObjectId("569415c8299692ceedf86573"),
"result" : [ { "id" : "j3847", "value" : "water" } ]
}

One way is using the $where operator though would not recommend as using it invokes a full collection scan regardless of what other conditions could possibly use an index selection and also invokes the JavaScript interpreter over each result document, which is going to be considerably slower than native code.
That being said, use the alternative .aggregate() method for this type of comparison instead which is definitely the better option:
db.docs.aggregate([
{ "$unwind": "$result" },
{
"$project": {
"result": 1,
"same": { "$eq": [ "$list.blue", "$result.id" ] }
}
},
{ "$match": { "same": true } },
{
"$project": {
"_id": 0,
"value": "$result.value"
}
}
])
When the $unwind operator is applied on the result array field, it will generate a new record for each and every element of the result field on which unwind is applied. It basically flattens the data and then in the subsequent $project step inspect each member of the array to compare if the two fields are the same.
Sample Output
{
"result" : [
{
"value" : "water"
}
],
"ok" : 1
}
Another alternative is to use the $map and $setDifference operators in a single $project step where you can avoid the use of $unwind which can be costly on very large collections and in most cases result in the 16MB BSON limit constraint:
db.docs.aggregate([
{
"$project": {
"result": {
"$setDifference": [
{
"$map": {
"input": "$result",
"as": "r",
"in": {
"$cond": [
{ "$eq": [ "$$r.id", "$list.blue" ] },
"$$r",
false
]
}
}
},
[false]
]
}
}
}
])
Sample Output
{
"result" : [
{
"_id" : ObjectId("569412e5a51a6656962af1c7"),
"result" : [
{
"id" : "j3847",
"value" : "water"
}
]
}
],
"ok" : 1
}

Related

Querying a multi-nested array in MongoDb 3.4.2

MongoDB Version - 3.4.2
I'm trying to query within the Sitecore Analytics database, trying to retrieve all users that are associated with a given List Id.
The example dataset I have follows the default Sitecore Analytics setup:
"Tags" : {
"Entries" : {
"ContactLists" : {
"Values" : {
"0" : {
"Value" : "{1E2D1AB7-72A0-4FF7-906B-DCDC020B87D2}",
"DateTime" : ISODate("2020-10-23T17:38:13.891Z")
},
"1" : {
"Value" : "{28BECCD3-476B-4B1D-9A75-02E59EF21286}",
"DateTime" : ISODate("2018-04-18T14:22:41.763Z")
},
"2" : {
"Value" : "{2C2BB0C3-483D-490E-B93A-9155BFBBE5DC}",
"DateTime" : ISODate("2018-05-10T14:26:08.494Z")
},
"3" : {
"Value" : "{DBE480F6-E305-4B35-9E6D-CBED64F4E44F}",
"DateTime" : ISODate("2018-10-27T02:41:28.776Z")
},
}
}
}
},
I want to iterate through all the entries within Values without having to specify 0/1/2/3, avoiding the following:
db.getCollection('Contacts').find({"Tags.Entries.ContactLists.Values.1.Value": "{28BECCD3-476B-4B1D-9A75-02E59EF21286}"})
I've tried the following:
db.getCollection('Contacts').find({"Tags.Entries.ContactLists.Values": {$elemMatch : {"Value":"{28BECCD3-476B-4B1D-9A75-02E59EF21286}"}}})
db.getCollection('Contacts').find({'Tags' : {$elemMatch : {$all : ['{28BECCD3-476B-4B1D-9A75-02E59EF21286}']}}})
db.getCollection('Contacts').forEach(function (doc) {
for(var i in doc.Tags.Entries.ContactLists.Values)
{
doc.Tags.Entries.ContactLists.Values[i].Value = "{28BECCD3-476B-4B1D-9A75-02E59EF21286}";
}
})
And a few other variations which I cannot recall now. And none work.
Any ideas if this is possible or on how to do this?
I want the outcome to just show filter out the results showing only the entries containing the matching GUID
Many thanks!
Demo - https://mongoplayground.net/p/upgYxgzPwJQ
It can be done using aggregation pipeline
Use $objectToArray to convert array
Use $filter to filter the array
db.collection.aggregate([
{
$addFields: {
filteredValue: {
$filter: {
input: {
$objectToArray: "$Tags.Entries.ContactLists.Values"
},
as: "val",
cond: {
$eq: [ // filter condition
"$$val.v.Value",
"{28BECCD3-476B-4B1D-9A75-02E59EF21286}"
]
}
}
}
}
}
])
Output -
[
{
"Tags": {
"Entries": {
"ContactLists": {
"Values": {
"0": {
"DateTime": ISODate("2020-10-23T17:38:13.891Z"),
"Value": "{1E2D1AB7-72A0-4FF7-906B-DCDC020B87D2}"
},
"1": {
"DateTime": ISODate("2018-04-18T14:22:41.763Z"),
"Value": "{28BECCD3-476B-4B1D-9A75-02E59EF21286}"
},
"2": {
"DateTime": ISODate("2018-05-10T14:26:08.494Z"),
"Value": "{2C2BB0C3-483D-490E-B93A-9155BFBBE5DC}"
},
"3": {
"DateTime": ISODate("2018-10-27T02:41:28.776Z"),
"Value": "{DBE480F6-E305-4B35-9E6D-CBED64F4E44F}"
}
}
}
}
},
"_id": ObjectId("5a934e000102030405000000"),
"filteredValue": [
{
"k": "1",
"v": {
"DateTime": ISODate("2018-04-18T14:22:41.763Z"),
"Value": "{28BECCD3-476B-4B1D-9A75-02E59EF21286}"
}
}
]
}
]
You can not use $elemMatch because Values is not array, but object. You can solve the problem with Aggregation Pipeline:
$addFields to add new field Values_Array that will be array representation of Values object.
$objectToArray to transform Values object to array
$match to find all documents that has requested value in new Values_Array field
$project to specify which properties to return from the result
db.getCollection('Contacts').aggregate([
{
"$addFields": {
"Values_Array": {
"$objectToArray": "$Tags.Entries.ContactLists.Values"
}
}
},
{
"$match": {
"Values_Array.v.Value": "{28BECCD3-476B-4B1D-9A75-02E59EF21286}"
}
},
{
"$project": {
"Tags": 1
}
}
])
Here is the working example: https://mongoplayground.net/p/2gY-vu3Qrvz

MongoDB - Find documents matching certain condition for unknown field keys

How can I query a MongoDB collection to find documents with a structure as below? The documents have a field called thing which is a subdocument, and the keys for this field are a form of ID number which will generally not be known by the person writing the query (making dot notation difficult and I assume impossible).
{
"_id" : 3,
"_id2" : 234,
"thing":
{
"2340945683":
{"attribute1": "typeA",
"attribute2": "typeB",
"attribute3": "typeA"
},
"349687346":
{"attribute1": "typeC",
"attribute2": "typeB",
"attribute3": "typeA"
}
},
"username": "user1"
}
Say I want to set a filter which will return the document only if some one or more of the fields within thing have the condition "attribute1" : "typeC"?
I need something like
db.collection.find( {thing.ANY_FIELD: $elemMatch:{"attribute1":"typeC"}})
You need to start with $objectToArray to read your keys dynamically. Then you can $map properties along with $anyElementTrue to detect if there's any nested field in thing containing {"attribute1":"typeC"}:
db.collection.aggregate([
{
$match: {
$expr: {
$anyElementTrue: {
$map: {
input: { $objectToArray: "$thing" },
in: { $eq: [ "$$this.v.attribute1", "typeC" ] }
}
}
}
}
}
])
Mongo Playground
My solution to this was to use two aggregate operations, the first one is called objectToArray and it's purpose is to convert a object into a list of objects with keys and values (see the documentation examples), and the reduce to search in this array of key-values, at the end we end up with a boolean "hasAttribute" indicating that the one field matched the value wee are looking for.
Here is the solution:
db.getCollection("thing").aggregate([
{
$addFields: {
hasAttribute: {
$reduce: {
input: {
$objectToArray: "$thing"
},
initialValue: false,
in: {$or: ["$$value", {$eq: ["typeC", "$$this.v.attribute1"]}]}
}
}
}
},
{
$match: {
hasAttribute: true
}
}
])
Here is the sample output and how the boolean value behaves:
{
"_id" : ObjectId("5ddd63c02e5c579c5076c76f"),
"thing" : {
"349687346" : {
"attribute1" : "typeC",
"attribute2" : "typeB",
"attribute3" : "typeA"
},
"2340945683" : {
"attribute1" : "typeA",
"attribute2" : "typeB",
"attribute3" : "typeA"
}
},
"hasAttribute" : true
}
// ----------------------------------------------
{
"_id" : ObjectId("5ddd63d12e5c579c5076c770"),
"thing" : {
"2340945683" : {
"attribute1" : "typeA",
"attribute2" : "typeB",
"attribute3" : "typeA"
}
},
"hasAttribute" : false
}
// ----------------------------------------------
{
"_id" : ObjectId("5ddd63d12e5c579c5076c771"),
"thing" : {
"349687346" : {
"attribute1" : "typeC",
"attribute2" : "typeB",
"attribute3" : "typeA"
}
},
"hasAttribute" : true
}
Ask for clarifications if you need!

MongoDB projection on specific nested properties

"data" : {
"visits" : {
"daily" : {
"2018-09-05" : 3586,
"2018-09-06" : 2969,
"2018-09-07" : 2624,
"2018-09-08" : 2803,
"2018-09-09" : 3439,
"2018-09-10" : 3655
}
}
},
I have property structure in MongoDB like this, what I am trying to do is, if i have start date and end date, for example (2018-09-06 - 2018-09-07),
I want to get result in this format
"data" : {
"visits" : {
"daily" : {
"2018-09-06" : 2969,
"2018-09-07" : 2624
}
}
},
Is there any efficient way to do it dynamically? I can do it by putting in projections things like this {"data.visits.daily.2018-09-06": 1, "data.visits.daily.2018-09-07": 1} and it works but it doesn't seem to me like a good solution.
Using MongoDB 3.4.4 and newer versions:
db.collection.aggregate([
{ "$addFields": {
"data.visits.daily": {
"$arrayToObject": {
"$filter": {
"input": { "$objectToArray": "$data.visits.daily" },
"as": "el",
"cond": {
"$and": [
{ "$gte": ["$$el.k", "2018-09-06"] },
{ "$lte": ["$$el.k", "2018-09-07"] },
]
}
}
}
}
} }
])
The above pipeline will yield the final output
{
"data" : {
"visits" : {
"daily" : {
"2018-09-06" : 2969,
"2018-09-07" : 2624
}
}
}
}
Explanations
The pipeline can be decomposed to show each individual operator's results.
$objectToArray
$objectToArray enables you to transform the document with dynamic keys
into an array that contains a element for each field/value pair in the original document. Each element in the return array is a document that contains two fields k and v.
Running the pipeline with just the operator in a $project stage
db.collection.aggregate([
{ "$project": {
"keys": { "$objectToArray": "$data.visits.daily" }
} }
])
yields
{
"_id" : ObjectId("5bab6d09b1951fef20a5dce4"),
"keys" : [
{
"k" : "2018-09-05",
"v" : 3586
},
{
"k" : "2018-09-06",
"v" : 2969
},
{
"k" : "2018-09-07",
"v" : 2624
},
{
"k" : "2018-09-08",
"v" : 2803
},
{
"k" : "2018-09-09",
"v" : 3439
},
{
"k" : "2018-09-10",
"v" : 3655
}
]
}
$filter
The $filter operator acts as a filtering mechanism for the array produced by the $objectToArray operator, works by selecting a subset of the array to return based on the specified condition which
becomes your query.
Consider the following pipeline which returns an array of the key/value pair that matches the condition "2018-09-06" <= key <= "2018-09-07"
db.collection.aggregate([
{ "$project": {
"keys": {
"$filter": {
"input": { "$objectToArray": "$data.visits.daily" },
"as": "el",
"cond": {
"$and": [
{ "$gte": ["$$el.k", "2018-09-06"] },
{ "$lte": ["$$el.k", "2018-09-07"] },
]
}
}
}
} }
])
which yields
{
"_id" : ObjectId("5bab6d09b1951fef20a5dce4"),
"keys" : [
{
"k" : "2018-09-06",
"v" : 2969
},
{
"k" : "2018-09-07",
"v" : 2624
}
]
}
$arrayToObject
This will transform the filtered array above from
[
{
"k" : "2018-09-06",
"v" : 2969
},
{
"k" : "2018-09-07",
"v" : 2624
}
]
to the original document with the dynamic key
{
"2018-09-06" : 2969,
"2018-09-07" : 2624
}
so running the pipeline
db.collection.aggregate([
{ "$project": {
"keys": {
"$arrayToObject": {
"$filter": {
"input": { "$objectToArray": "$data.visits.daily" },
"as": "el",
"cond": {
"$and": [
{ "$gte": ["$$el.k", "2018-09-06"] },
{ "$lte": ["$$el.k", "2018-09-07"] },
]
}
}
}
}
} }
])
will produce
{
"_id" : ObjectId("5bab6d09b1951fef20a5dce4"),
"keys" : {
"2018-09-06" : 2969,
"2018-09-07" : 2624
}
}
But of course you would want to preserve the original schema i.e. the current fields so you would need to use $addFields instead of the $project pipeline used for illustrated.
$addFields
This is is equivalent to a $project stage that explicitly specifies all existing fields in the input documents and adds the new fields. Specifying an existing field name in an $addFields operation causes the original field to be replaced and you would need to use dot notation to to update the embedded data.visits.daily field with the dynamic keys.
You can achieve this using the following aggregation :
var startdate = "2018-09-06";
var enddate = "2018-09-09";
db['01'].aggregate(
[
{
$project: {
daily:{$objectToArray:"$data.visits.daily"}
}
},
{
$unwind: {
path : "$daily",
}
},
{
$addFields: {
"date": {$dateFromString:{dateString:"$daily.k",format:"%Y-%m-%d"}}
}
},
{
$match: {
$and:[{date:{$gte:new Date(startdate)}},{date:{$lte:new Date(enddate)}}]
}
},
{
$group: {
_id:"_id",
daily:{$push:"$daily"}
}
},
{
$project: {
"data.visits.daily":{$arrayToObject:"$daily"}
}
},
]
);

how to count number of keys in subdocument using aggregation pipeline?

Suppose I have a document like this:
{
"_id" : ObjectId("57eb386e37b4842ff5f386c9"),
"lesson_id" : ObjectId("57e27cd190e6993e393f5c74"),
"student_id" : ObjectId("57d3c3f590e6995fe8de7932"),
"answer_records" : {
"1" : {
"answer" : [
"A"
]
},
"3" : {
"answer" : [
"C"
]
}
}
I want to count the number of answer records in the collection. Apparently, this document contribute two answer records which are "1" and "3". So, my question is how to achieve this using aggregation pipeline.
In your case, it is far easier to just use JS.
On the mongo shell :
var json=db.sof.findOne().answer_records;
Object.keys(json).length;
Prints 2 for the number of answer records in the said document.
For MongoDB 3.6 and newer, use the $objectToArray operator within an aggregation pipeline to convert the document to an array. The return array contains an element for each field/value pair in the original document. Each element in the return array is a document that contains two fields k and v.
On getting the array, you can then leverage the use of $addFields pipeline step to create a field that holds the counts and the actual count is derived with the use of the $size operator.
All this can be done in a single pipeline by nesting the expressions as follows:
db.collection.aggregate([
{
"$addFields": {
"answers_count": {
"$size": {
"$objectToArray": "$answer_records"
}
}
}
}
])
Sample Output
{
"_id" : ObjectId("57eb386e37b4842ff5f386c9"),
"lesson_id" : ObjectId("57e27cd190e6993e393f5c74"),
"student_id" : ObjectId("57d3c3f590e6995fe8de7932"),
"answer_records" : {
"1" : {
"answer" : [
"A"
]
},
"3" : {
"answer" : [
"C"
]
}
},
"answers_count": 2
}
For MongoDB server versions which do not support the above operators, you would need to change your schema design in order to carry out efficient queries with the aggregation framework. As it is currently you'd need
to preprocess the documents either on the client or server with JavaScript thus you won't be able to fully utilise MongoDB's better infrastructure built for faster querying.
The ideal design follows:
{
"_id" : ObjectId("57eb386e37b4842ff5f386c9"),
"lesson_id" : ObjectId("57e27cd190e6993e393f5c74"),
"student_id" : ObjectId("57d3c3f590e6995fe8de7932"),
"answer_records" : [
{ "id": "1", "answer": "A" }
{ "id": "3", "answer": "C" }
]
}
which you can then simply apply the aggregation's $project pipeline that uses the $size operator to return the length of the answer_records array per document:
db.collection.aggregate([
{
"$project": {
"lesson_id": 1,
"student_id": 1,
"count": { "$size": "$answer_records" }
}
}
])
If you want the total number of answer records for the whole collection then add another $group pipeline to get the accumulated total for all the documents using an _id of null:
db.collection.aggregate([
{
"$project": {
"count": { "$size": "$answer_records" }
}
},
{
"$group": {
"_id": null,
"total_answers": { "$sum": "$count" }
}
}
])
Otherwise with the current design your only option is MapReduce which is much slower:
db.collection.mapReduce(
function() {
emit(this._id, Object.keys(this.answer_records).length);
},
function() { },
{ "out": { "inline": 1 } }
)
Sample Output:
{
"results" : [
{
"_id" : ObjectId("57eb386e37b4842ff5f386c9"),
"value" : 2
}
],
....
}
To get the total for all the documents in the collection then run this mapReduce operation:
db.collection.mapReduce(
function() {
emit(null, Object.keys(this.answer_records).length);
},
function(key, values) {
return Array.sum(values);
},
{ "out": { "inline": 1 } }
)

How to assign weights to searched documents in MongoDb?

This might sounds like simple question for you but i have spend over 3 hours to achieve it but i got stuck in mid way.
Inputs:
List of keywords
List of tags
Problem Statement: I need to find all the documents from the database which satisfy following conditions:
List documents that has 1 or many matching keywords. (achieved)
List documents that has 1 or many matching tags. (achieved)
Sort the found documents on the basis of weights: Each keyword matching carry 2 points and each tag matching carry 1 point.
Query: How can i achieve requirement#3.
My Attempt: In my attempt i am able to list only on the basis of keyword match (that too without multiplying weight with 2 ).
tags are array of documents. Structure of each tag is like
{
"id" : "ICC",
"some Other Key" : "some Other value"
}
keywords are array of string:
["women", "cricket"]
Query:
var predicate = [
{
"$match": {
"$or": [
{
"keywords" : {
"$in" : ["cricket", "women"]
}
},
{
"tags.id" : {
"$in" : ["ICC"]
}
}
]
}
},
{
"$project": {
"title":1,
"_id": 0,
"keywords": 1,
"weight" : {
"$size": {
"$setIntersection" : [
"$keywords" , ["cricket","women"]
]
}
},
"tags.id": 1
}
},
{
"$sort": {
"weight": -1
}
}
];
It seems that you were close in your attempt, but of course you need to implement something to "match your logic" in order to get the final "score" value you want.
It's just a matter of changing your projection logic a little, and assuming that both "keywords" and "tags" are arrays in your documents:
db.collection.aggregate([
// Match your required documents
{ "$match": {
"$or": [
{
"keywords" : {
"$in" : ["cricket", "women"]
}
},
{
"tags.id" : {
"$in" : ["ICC"]
}
}
]
}},
// Inspect elements and create a "weight"
{ "$project": {
"title": 1,
"keywords": 1,
"tags": 1,
"weight": {
"$add": [
{ "$multiply": [
{"$size": {
"$setIntersection": [
"$keywords",
[ "cricket", "women" ]
]
}}
,2] },
{ "$size": {
"$setIntersection": [
{ "$map": {
"input": "$tags",
"as": "t",
"in": "$$t.id"
}},
["ICC"]
]
}}
]
}
}},
// Then sort by that "weight"
{ "$sort": { "weight": -1 } }
])
So it is basicallt the $map logic here that "transforms" the other array to just give the id values for comparison against the "set" solution that you want.
The $add operator provides the additional "weight" to the member you want to "weight" your responses by.