Perform search with facets unknown upfront Atlas MongoDB - mongodb

I have the following document structure in MongoDB:
{
// other keys,
tags: [
tagA: "red",
tagB: "green"
]
},
{
// other keys,
tags: [
tagA: "orange",
tagB: "green",
tagC: "car"
]
}
I want to perform a $facets search that gives me the following output (name of each tag + values that occur on that tag + count of these value):
{
[
tagA: {
red: 1,
orange: 1
},
tagB: {
green: 2
},
tagC: {
car: 1
}
]
}
The tricky part is that the facets are unknown upfront (they can vary), and every tutorial I found only works for a predefined set of facets.
Is it possible?
P.S.: how to get the output of this to come alongside with a given query? So that the return is something like:
{
queryResults: [all the results, as in a normal query],
facets: [result showed in accepted answer]
}

If you consider having this as input (i've added bracket around object in your array) :
[
{
tags: [
{
tagA: "red"
},
{
tagB: "green"
}
]
},
{
tags: [
{
tagA: "orange"
},
{
tagB: "green"
},
{
tagC: "car"
}
]
}
]
You could then do an aggregation pipeline as follow :
db.collection.aggregate([
{
"$unwind": "$tags"
},
{
"$addFields": {
"kv": {
"$objectToArray": "$tags"
}
}
},
{
"$unwind": "$kv"
},
{
"$group": {
"_id": {
key: "$kv.k",
value: "$kv.v"
},
"count": {
"$sum": 1
}
}
},
{
"$group": {
"_id": "$_id.key",
"value": {
"$push": {
"k": "$_id.value",
"v": "$count"
}
}
}
},
{
$project: {
val: [
{
k: "$_id",
v: {
"$arrayToObject": "$value"
}
}
]
}
},
{
$project: {
res: {
"$arrayToObject": "$val"
}
}
},
{
$replaceRoot: {
newRoot: "$res"
}
}
])
It would give you this result :
[
{
"tagA": {
"orange": 1,
"red": 1
}
},
{
"tagB": {
"green": 2
}
},
{
"tagC": {
"car": 1
}
}
]
You can see this on mongoplayground : https://mongoplayground.net/p/FZbM-BGJRBm
Hope this answer your question.
Detailled explanation :
I use $unwind on the tags field in order to get one object per object in tags array.
I use $objectToArray to get keys (tagsA, tagsB) as values.
$unwind to go from an array to objets.
$group with $sum accumulator to calculate the occurence of each unique combination.
$group by tagsA,tagsB, etc with $push accumulator to add value in array (will be usufull afterwards)
$arrayToObject to go from array to object
Same
$replaceRoot to display results better.
If you want to understand more each step, consider reading mongo doc of each pipeline aggregator i used. You can also use the mongoplayground link above, delete some code to see what happens after each step.

Related

Variable set of attributes $facets MongoDB [duplicate]

I have the following document structure in MongoDB:
{
// other keys,
tags: [
tagA: "red",
tagB: "green"
]
},
{
// other keys,
tags: [
tagA: "orange",
tagB: "green",
tagC: "car"
]
}
I want to perform a $facets search that gives me the following output (name of each tag + values that occur on that tag + count of these value):
{
[
tagA: {
red: 1,
orange: 1
},
tagB: {
green: 2
},
tagC: {
car: 1
}
]
}
The tricky part is that the facets are unknown upfront (they can vary), and every tutorial I found only works for a predefined set of facets.
Is it possible?
P.S.: how to get the output of this to come alongside with a given query? So that the return is something like:
{
queryResults: [all the results, as in a normal query],
facets: [result showed in accepted answer]
}
If you consider having this as input (i've added bracket around object in your array) :
[
{
tags: [
{
tagA: "red"
},
{
tagB: "green"
}
]
},
{
tags: [
{
tagA: "orange"
},
{
tagB: "green"
},
{
tagC: "car"
}
]
}
]
You could then do an aggregation pipeline as follow :
db.collection.aggregate([
{
"$unwind": "$tags"
},
{
"$addFields": {
"kv": {
"$objectToArray": "$tags"
}
}
},
{
"$unwind": "$kv"
},
{
"$group": {
"_id": {
key: "$kv.k",
value: "$kv.v"
},
"count": {
"$sum": 1
}
}
},
{
"$group": {
"_id": "$_id.key",
"value": {
"$push": {
"k": "$_id.value",
"v": "$count"
}
}
}
},
{
$project: {
val: [
{
k: "$_id",
v: {
"$arrayToObject": "$value"
}
}
]
}
},
{
$project: {
res: {
"$arrayToObject": "$val"
}
}
},
{
$replaceRoot: {
newRoot: "$res"
}
}
])
It would give you this result :
[
{
"tagA": {
"orange": 1,
"red": 1
}
},
{
"tagB": {
"green": 2
}
},
{
"tagC": {
"car": 1
}
}
]
You can see this on mongoplayground : https://mongoplayground.net/p/FZbM-BGJRBm
Hope this answer your question.
Detailled explanation :
I use $unwind on the tags field in order to get one object per object in tags array.
I use $objectToArray to get keys (tagsA, tagsB) as values.
$unwind to go from an array to objets.
$group with $sum accumulator to calculate the occurence of each unique combination.
$group by tagsA,tagsB, etc with $push accumulator to add value in array (will be usufull afterwards)
$arrayToObject to go from array to object
Same
$replaceRoot to display results better.
If you want to understand more each step, consider reading mongo doc of each pipeline aggregator i used. You can also use the mongoplayground link above, delete some code to see what happens after each step.

Convert array of objects into an array of values from that object

I have the following documents in my collection:
{
"archives" : [
{ "colour" : "red", "default" : true },
{ "colour" : "green", "default" : false }
]
}
{
"archives" : [
{ "colour" : "yellow", "default" : true }
]
}
I want to project the colour value from the archive objects as follows:
{
"archives" : [ "red", "green" ]
}
{
"archives" : [ "yellow" ]
}
My proposal
My best attempt at this has been this query:
db.test.find({}, {
'archives': {
'$map': {
'input': '$archives',
'in': '$archives.colour'
}
}
})
But it's returning an array of arrays with redundant information, like so:
{ "archives" : [ [ "red", "green" ], [ "red", "green" ] ] }
{ "archives" : [ [ "yellow" ] ] }
So what would be the correct query to give the result I need, preferably on the database side, and as efficient as possible?
Why not simply this:
db.test.aggregate([
{ $set: { archives: "$archives.colour" } }
])
If you like to use $map, then is would be this one. You missed the $$this variable:
db.test.aggregate([
{
$set: {
archives: {
"$map": {
"input": "$archives",
"in": "$$this.colour"
}
}
}
}
])
or
db.test.aggregate([
{
$set: {
archives: {
"$map": {
"input": "$archives.colour",
"in": "$$this"
}
}
}
}
])
You can use aggregation framework:
$unwind
$group
db.test.aggregate([
{
"$unwind": "$archives"
},
{
"$group": {
"_id": "$_id",
"archives": {
"$push": "$archives.colour"
}
}
}
])
Playground
And if you don't want the _id in the output, you can exclude it by adding an additional $project stage:
db.test.aggregate([
{
"$unwind": "$archives"
},
{
"$group": {
"_id": "$_id",
"archives": {
"$push": "$archives.colour"
}
}
},
{
"$project": {
_id: 0
}
}
])

Mongodb: is it possible to do this in one query?

I am new to Mongodb, Here is my document format:
{
"_id": {
"$oid": "5ee023790a0e502e3a9ce9e7"
},
"data": {
"Quick": [
["1591745491", "4", "uwp"],
["1591745492", "4", "uwp"],
["1591745516", "12", "Word"],
["1591747346", "8", "uwp"]
]
"Key": [
["1591747446", "Num"]
]
"Search": [
["1591745491", "tty"],
["1591745492", "erp"],
["1591745516", "Word"],
["1591747346", "uwp"]
]
},
"devicecode": "MP1G5L9EMP1G5L9E#LENOVO"
}
What I want to do is:
group by devicecode
for each group, count how many times they used "Quick", "key" and "Search" (count how many line under the name)
Currently I am using a python program to get this done. but I believe that should be a way to get it done within Mongodb.
The output format should look like this:
devicecode: MP1G5L9EMP1G5L9E#LENOVO, Quick: 400, key: 350, Search: 660
...
You could use aggregation framework to compute the length of individual arrays in the $set stage and then in the $group stage group-by device while summing up the computed array length values from the previous stage. Finally, in the $project stage map _id to devicecode and deselect _id.
db.getCollection("testcollection").aggregate([
{
$set: {
QuickLen: {
$size: {
$ifNull: [
"$data.Quick",
[]
]
}
},
KeyLen: {
$size: {
$ifNull: [
"$data.Key",
[]
]
}
},
SearchLen: {
$size: {
$ifNull: [
"$data.Search",
[]
]
}
}
}
},
{
$group: {
_id: "$devicecode",
Quick: {
$sum: "$QuickLen"
},
key: {
$sum: "$KeyLen"
},
Search: {
$sum: "$SearchLen"
}
}
},
{
$project: {
devicecode: "$_id",
Quick: 1,
key: 1,
Search: 1,
_id: 0
}
}
])

Mongodb find maximum based on nested object key

I have below schema where I need to identify the object which has highest rank.
{ "team" : {
"member1" : [ { "rank": 2, "goal": 50 } ],
"member2" : [ { "rank": 5, "goal": 30 } ],
"member3" : [ { "rank": 1, "goal": 80 } ]
}}
$unwind will not work on the nested objects. Tried to convert this object as Array and tried to find the max of rank key. Any help would be appreciated.
If the intent is to only find the maximum rank that exists: The idea is a two stage aggregation query using $project and using $objectToArray to have common keys from which $max on required attribute can be applied.
Query: playground link
db.collection.aggregate([
{
$project: {
teamsData: {
$objectToArray: "$team"
}
}
},
{
$project: {
maxRank: {
$max: "$teamsData.v.rank"
}
}
}
]);
To get the object details that has the maximum rank: Use $unwind on the array projected from previous stage to help in sorting by rank $sort and then picking the the first item $first at $group stage.
Query: playgorund link
db.collection.aggregate([
{
$project: {
team: {
$objectToArray: "$team"
}
}
},
{
$unwind: "$team"
},
{
$sort: {
"team.v.rank": -1
}
},
{
$group: {
_id: null,
maxRankObj: {
$first: "$$ROOT"
}
}
}
]);
Sample O/P:
[
{
"_id": null,
"maxRankObj": {
"_id": ObjectId("5a934e000102030405000000"),
"team": {
"k": "member2",
"v": [
{
"goal": 30,
"rank": 5
}
]
}
}
}
]

How to write a custom function to split a document into multiple documents of same Id

I am trying to split a document which has the following fields of string type:
{
"_id" : "17121",
"firstName": "Jello",
"lastName" : "New",
"bio" :"He is a nice person."
}
I want to split the above document into three new documents For Example:
{
"_id": "17121-1",
"firstName": "Jello"
}
{
"_id": "17121-2",
"firstName": "New"
}
{
"_id": "17121-3",
"bio": "He is a nice person."
}
Can anyone suggest how to proceed?
db.coll1.find().forEach(function(obj){
// I want to extract every single field. How to iterate on the field within this Bson object(obj) to collect every field.?
});
or any suggestion to do with aggregation pipeline in MongoDB.
You can use the below aggregation query.
The below query will convert each document fields into key value document array followed by $unwind while keeping the index and $replaceRoot with merge to produce the desired output.
$objectToArray to produce array (keyvalarr) with key (name of the array field)-value (array field) pair.
$match to remove the _id key value document.
$arrayToObject to produce the named key value while adding new _id key value pair and flatten array key values.
db.coll.aggregate([
{
"$project": {
"keyvalarr": {
"$objectToArray": "$$ROOT"
}
}
},
{
"$unwind": {
"path": "$keyvalarr",
"includeArrayIndex": "index"
}
},
{
"$match": {
"keyvalarr.k": {
"$ne": "_id"
}
}
},
{
"$replaceRoot": {
"newRoot": {
"$arrayToObject": [
{
"k": "_id",
"v": {
"$concat": [
{
"$substr": [
"$_id",
0,
-1
]
},
"-",
{
"$substr": [
"$index",
0,
-1
]
}
]
}
},
"$keyvalarr"
]
}
}
}
])
Anu. Here are two options you can use.
The first option is pretty straightforward, but it requires you to hardcode _id' indexes yourself.
db.users.aggregate([
{
$project: {
pairs : [
{ firstName: '$firstName', _id : { $concat : [ { $substr : [ '$_id', 0, 50 ] }, '-1' ] } },
{ lastName: '$lastName', _id : { $concat : [ '$_id', '-2' ] } },
{ bio: '$bio', _id : { $concat : [ { $substr : [ '$_id', 0, 50 ] }, '-3' ] } }
]
}
},
{
$unwind : '$pairs'
},
{
$replaceRoot: { newRoot: '$pairs' }
}
])
The second option does a little bit more job and is somewhat more tricky. But it is probably easier to extend if you ever need to add another field.
db.users.aggregate([
{
$project: {
pairs : [
{ firstName: '$firstName' },
{ lastName: '$lastName' },
{ bio: '$bio' }
]
}
},
{
$addFields: {
pairsReference : '$pairs'
}
},
{
$unwind: '$pairs'
},
{
$addFields: {
'pairs._id' : { $concat: [ { $substr : [ '$_id', 0, 50 ] }, '-', { $substr: [ { $indexOfArray : [ '$pairsReference', '$pairs' ] }, 0, 2 ] } ] }
}
},
{
$replaceRoot: { newRoot: '$pairs' }
}
])
You can redirect results of both queries into another collection by using $out stage.
UPD:
The only reason you get the error is that one of the _ids is not a string.
Replace the first parameter of $concat ($_id) with the following expression:
{ $substr : [ '$_id', 0, 50 ] }