MongoDB find count of all possible 'columns' within a collection - mongodb

Is there a way to find all possible number of 'columns' or json properties available in a collection? (I know it's not correct to call them columns, but just for the ease of understanding)
For example, all the following documents are in the same collection called 'people':
{"Name": "bob", "Profession": "IT", "Height": 200},
{"Name": "simon", "Weight": 100, "IQ": 120},
{"Name": "james", "Weight": 130, "Glasses": "Yes"}
The possible 'columns' here are: Name, Profession, Height, Weight, IQ and Glasses. A total of 6.
Is there any way I can do an operation which gets this count of 6? (extra useful if there's also a pymongo variant)
I'm wanting to transfer data from MongoDB into a table format, and knowing the overall number of columns the table can have is useful.

You can use this aggregation query to get your desired result:
The trick here is to use $objectToArray to get the keys as values. Then remove the key _id (if exists) and group to get the total.
db.collection.aggregate([
{
"$project": {
"keys": {
"$objectToArray": "$$ROOT"
}
}
},
{
"$unwind": "$keys"
},
{
"$match": {
"keys.k": {
"$ne": "_id"
}
}
},
{
"$group": {
"_id": "$keys.k",
"total": {
"$sum": 1
}
}
},
{
"$group": {
"_id": null,
"total": {
"$sum": 1
}
}
}
])
Example here
Edit:
Another way to avoid $unwind and double $group id this query:
The idea is the same as before, use $objectToArray to get the keys as key.k and then $group all values and add into an array.
Then get the size of the array after to do some calculations: A $reduce to flatten the array and $filter to not get the _id field.
Note that if you want to count the _id you can simply remove the $filter stage like this example
db.collection.aggregate([
{
"$project": {
"keys": {
"$objectToArray": "$$ROOT"
}
}
},
{
"$group": {
"_id": null,
"keys": {
"$addToSet": "$keys.k"
}
}
},
{
"$project": {
"_id": 0,
"keys": {
"$size": {
"$filter": {
"input": {
"$reduce": {
"input": "$keys",
"initialValue": [],
"in": {
"$setUnion": [
"$$value",
"$$this"
]
}
}
},
"cond": {
"$ne": [
"$$this",
"_id"
]
}
}
}
}
}
}
])
Example here

Related

Can I get the count of subdocuments that match a filter?

I have the following document
[
{
"_id": "624713340a3d2901f2f5a9c0",
"username": "fotis",
"exercises": [
{
"_id": "624713530a3d2901f2f5a9c3",
"description": "Sitting",
"duration": 60,
"date": "2022-03-24T00:00:00.000Z"
},
{
"_id": "6247136a0a3d2901f2f5a9c6",
"description": "Coding",
"duration": 999,
"date": "2022-03-31T00:00:00.000Z"
},
{
"_id": "624713a00a3d2901f2f5a9ca",
"description": "Sitting",
"duration": 999,
"date": "2022-03-30T00:00:00.000Z"
}
],
"__v": 3
}
]
And I am trying to get the count of exercises returned with the following aggregation (I know it is way easier to do it in my code, but I am trying to understand how to use mongodb queries)
db.collection.aggregate([
{
"$match": {
"_id": "624713340a3d2901f2f5a9c0"
}
},
{
"$project": {
"username": 1,
"exercises": {
"$slice": [
{
"$filter": {
"input": "$exercises",
"as": "exercise",
"cond": {
"$eq": [
"$$exercise.description",
"Sitting"
]
}
}
},
1
]
},
"count": {
"$size": "exercises"
}
}
}
])
When I try to access the exercises field using "$size": "exercises", I get an error query failed: (Location17124) Failed to optimize pipeline :: caused by :: The argument to $size must be an array, but was of type: string.
But when I access the subdocument exercises using "$size": "$exercises" I get the count of all the subdocuments contained in the document.
Note: I know that in this example I use $slice and I set the limit to 1, but in my code it is a variable.
You are actually on the right track. You don't really need the $slice. You can just use $reduce to perform the filtering. The reason that your count is not working is that the filtering and the $size are in the same stage. In such case, it will take the pre-filtered array to do the count. You can resolve this by adding a $addFields stage.
db.collection.aggregate([
{
"$match": {
"_id": "624713340a3d2901f2f5a9c0"
}
},
{
"$project": {
"username": 1,
"exercises": {
"$filter": {
"input": "$exercises",
"as": "exercise",
"cond": {
"$eq": [
"$$exercise.description",
"Sitting"
]
}
}
}
}
},
{
"$addFields": {
"count": {
$size: "$exercises"
}
}
}
])
Here is the Mongo playground for your reference.

MongoDB: single find request to return data from different documents with different fields

I have this collection:
{
"name": "Leonardo",
"height": "180",
"weapon": "sword",
"favorite_pizza": "Hawai"
},
{
"name": "Donatello",
"height": "181",
"weapon": "stick",
"favorite_pizza": "Pepperoni"
},
{
"name": "Michelangelo",
"height": "182",
"weapon": "nunchucks",
"favorite_pizza": "Bacon"
},
{
"name": "Raphael",
"height": "183",
"weapon": "sai",
"favorite_pizza": "Margherita"
}
With using one query I want this result (ordered by height):
{
"name": "Leonardo",
"height": "180",
"weapon": "sword",
"favorite_pizza": "Hawai"
},
{
"name": "Donatello",
},
{
"name": "Michelangelo",
},
{
"name": "Raphael",
}
So the query needs to first get the document which has smallest height field and then get all contents of that document, then it needs to get all other documents and return only name field of those documents, while ordering those documents by height.
Change your height to numeric for correct sorting and you can try below aggregation in 3.4 pipeline.
The query $sorts the document by "height" ascending followed by $group to create two fields, "first" field which has the smallest height record ($$ROOT to access the whole document) and "allnames" to record all names.
$project with $slice + $concatArrays to replace the "allnames" array first element with the smallest height document and get the updated array.
$unwind with $replaceRoot to promote all the docs to top level.
db.colname.aggregate([
{"$sort":{
"height":1
}},
{"$group":{
"_id":null,
"first":{"$first":"$$ROOT"},
"allnames":{"$push":{"name":"$name"}}
}},
{"$project":{
"data":{"$concatArrays":[["$first"],{"$slice":["$allnames",1,{"$size":"$allnames"}] } ]}
}},
{"$unwind":"$data"},
{"$replaceRoot":{"newRoot":"$data"}}
])
Just for completeness reasons...
#Veeram's answer is probably the better choice (I have a feeling it should be faster and easier to understand) but you can achieve the same result using a slightly simpler $group stage followed by slightly more complex $project stage using $reduce:
collection.aggregate([{
$sort: {
"height": 1
}
}, {
$group: {
"_id":null,
"allnames": {
$push: "$$ROOT"
}
}
}, {
$project: {
"data": {
$reduce: {
input: "$allnames",
initialValue: null,
in: {
$cond: [{
$eq: [ "$$value", null ] // if it's the first time we come here
},
[ "$$this" ], // we include the entire document
{
$concatArrays: [ // else we concat
"$$value", // the already concatenated values
[ { "name": "$$this.name" } ] // with the "name" of the currently looked at document
]
}]
}
}
}
}
}, {
$unwind: "$data"
}, {
$replaceRoot: {
"newRoot": "$data"
}
}])
Alternatively - as pointed out by #Veeram in the comment below - , it's possible to write the $reduce in this way:
$project: {
"data": {
$reduce: {
input: { "$slice": [ "$allnames", 1, { $size: "$allnames" } ] }, // process everything in the "allnames" array except for the first item
initialValue: { "$slice": [ "$allnames", 1 ] }, // start with the first item
in: { $concatArrays: [ "$$value", [ { "name": "$$this.name" } ] ]} // and keep appending the "name" field of all other items only
}
}
}

How to find match in documents in Mongo and Mongo aggregation?

I have following json structure in mongo collection-
{
"students":[
{
"name":"ABC",
"fee":1233
},
{
"name":"PQR",
"fee":345
}
],
"studentDept":[
{
"name":"ABC",
"dept":"A"
},
{
"name":"XYZ",
"dept":"X"
}
]
},
{
"students":[
{
"name":"XYZ",
"fee":133
},
{
"name":"LMN",
"fee":56
}
],
"studentDept":[
{
"name":"XYZ",
"dept":"X"
},
{
"name":"LMN",
"dept":"Y"
},
{
"name":"ABC",
"dept":"P"
}
]
}
Now I want to calculate following output.
if students.name = studentDept.name
so my result should be as below
{
"name":"ABC",
"fee":1233,
"dept":"A",
},
{
"name":"XYZ",
"fee":133,
"dept":"X"
}
{
"name":"LMN",
"fee":56,
"dept":"Y"
}
Do I need to use mongo aggregation or is it possible to get above given output without using aggregation???
What you are really asking here is how to make MongoDB return something that is actually quite different from the form in which you store it in your collection. The standard query operations do allow a "limitted" form of "projection", but even as the title on the page shared in that link suggests, this is really only about "limiting" the fields to display in results based on what is present in your document already.
So any form of "alteration" requires some form of aggregation, which with both the aggregate and mapReduce operations allow to "re-shape" the document results into a form that is different from the input. Perhaps also the main thing people miss with the aggregation framework in particular, is that it is not just all about "aggregating", and in fact the "re-shaping" concept is core to it's implementation.
So in order to get results how you want, you can take an approach like this, which should be suitable for most cases:
db.collection.aggregate([
{ "$unwind": "$students" },
{ "$unwind": "$studentDept" },
{ "$group": {
"_id": "$students.name",
"tfee": { "$first": "$students.fee" },
"tdept": {
"$min": {
"$cond": [
{ "$eq": [
"$students.name",
"$studentDept.name"
]},
"$studentDept.dept",
false
]
}
}
}},
{ "$match": { "tdept": { "$ne": false } } },
{ "$sort": { "_id": 1 } },
{ "$project": {
"_id": 0,
"name": "$_id",
"fee": "$tfee",
"dept": "$tdept"
}}
])
Or alternately just "filter out" the cases where the two "name" fields do not match and then just project the content with the fields you want, if crossing content between documents is not important to you:
db.collection.aggregate([
{ "$unwind": "$students" },
{ "$unwind": "$studentDept" },
{ "$project": {
"_id": 0,
"name": "$students.name",
"fee": "$students.fee",
"dept": "$studentDept.dept",
"same": { "$eq": [ "$students.name", "$studentDept.name" ] }
}},
{ "$match": { "same": true } },
{ "$project": {
"name": 1,
"fee": 1,
"dept": 1
}}
])
From MongoDB 2.6 and upwards you can even do the same thing "inline" to the document between the two arrays. You still want to reshape that array content in your final output though, but possible done a little faster:
db.collection.aggregate([
// Compares entries in each array within the document
{ "$project": {
"students": {
"$map": {
"input": "$students",
"as": "stu",
"in": {
"$setDifference": [
{ "$map": {
"input": "$studentDept",
"as": "dept",
"in": {
"$cond": [
{ "$eq": [ "$$stu.name", "$$dept.name" ] },
{
"name": "$$stu.name",
"fee": "$$stu.fee",
"dept": "$$dept.dept"
},
false
]
}
}},
[false]
]
}
}
}
}},
// Students is now an array of arrays. So unwind it twice
{ "$unwind": "$students" },
{ "$unwind": "$students" },
// Rename the fields and exclude
{ "$project": {
"_id": 0,
"name": "$students.name",
"fee": "$students.fee",
"dept": "$students.dept"
}},
])
So where you want to essentially "alter" the structure of the output then you need to use one of the aggregation tools to do. And you can, even if you are not really aggregating anything.

Mongodb array concatenation

When querying mongodb, is it possible to process ("project") the result so as to perform array concatenation?
I actually have 2 different scenarios:
(1) Arrays from different fields:, e.g:
Given:
{companyName:'microsoft', managers:['ariel', 'bella'], employees:['charlie', 'don']}
{companyName:'oracle', managers:['elena', 'frank'], employees:['george', 'hugh']}
I'd like my query to return each company with its 'managers' and 'employees' concatenated:
{companyName:'microsoft', allPersonnel:['ariel', 'bella','charlie', 'don']}
{companyName:'oracle', allPersonnel:['elena', 'frank','george', 'hugh']}
(2) Nested arrays:, e.g.:
Given the following docs, where employees are separated into nested arrays (never mind why, it's a long story):
{companyName:'microsoft', personnel:[ ['ariel', 'bella'], ['charlie', 'don']}
{companyName:'oracle', personnel:[ ['elena', 'frank'], ['george', 'hugh']}
I'd like my query to return each company with a flattened 'personal' array:
{companyName:'microsoft', allPersonnel:['ariel', 'bella','charlie', 'don']}
{companyName:'oracle', allPersonnel:['elena', 'frank','george', 'hugh']}
I'd appreciate any ideas, using either 'find' or 'aggregate'
Thanks a lot :)
Of Course in Modern MongoDB releases we can simply use $concatArrays here:
db.collection.aggregate([
{ "$project": {
"companyNanme": 1,
"allPersonnel": { "$concatArrays": [ "$managers", "$employees" ] }
}}
])
Or for the second form with nested arrays, using $reduce in combination:
db.collection.aggregate([
{ "$project": {
"companyName": 1,
"allEmployees": {
"$reduce": {
"input": "$personnel",
"initialValue": [],
"in": { "$concatArrays": [ "$$value", "$$this" ] }
}
}
}}
])
There is the $setUnion operator available to the aggregation framework. The constraint here is that these are "sets" and all the members are actually "unique" as a "set" requires:
db.collection.aggregate([
{ "$project": {
"companyname": 1,
"allPersonnel": { "$setUnion": [ "$managers", "$employees" ] }
}}
])
So that is cool, as long as all are "unique" and you are in singular arrays.
In the alternate case you can always process with $unwind and $group. The personnel nested array is a simple double unwind
db.collection.aggregate([
{ "$unwind": "$personnel" },
{ "$unwind": "$personnel" },
{ "$group": {
"_id": "$_id",
"companyName": { "$first": "$companyName" },
"allPersonnel": { "$push": { "$personnel" } }
}}
])
Or the same thing as the first one for versions earlier than MongoDB 2.6 where the "set operators" did not exist:
db.collection.aggregate([
{ "$project": {
"type": { "$const": [ "M", "E" ] },
"companyName": 1,
"managers": 1,
"employees": 1
}},
{ "$unwind": "$type" },
{ "$unwind": "$managers" },
{ "$unwind": "$employees" },
{ "$group": {
"_id": "$_id",
"companyName": { "$first": "$companyName" },
"allPersonnel": {
"$addToSet": {
"$cond": [
{ "$eq": [ "$type", "M" ] },
"$managers",
"$employees"
]
}
}
}}
])

MongoDB return two object for every group

I want to get two objects $first and $last after grouping. Is it possible?
Something like this, but this is not working:
{ "$group": {
"_id": "type",
"values": [{
"time": { "$first": "$time" },
"value": { "$first": "$value" }
},
{
"time": { "$last": "$time" },
"value": { "$last": "$value" }
}]
}
}
In order to get the $first and $last values from an array with the aggregation framework, you need to use $unwind first to "de-normalize" the array as individual documents. There is also another trick to put those back in an array.
Assuming a document like this
{
"type": "abc",
"values": [
{ "time": ISODate("2014-06-12T22:35:42.260Z"), "value": "ZZZ" },
{ "time": ISODate("2014-06-12T22:36:45.921Z"), "value": "KKK" },
{ "time": ISODate("2014-06-12T22:37:18.237Z"), "value": "AAA" }
]
}
And assuming that your array is already sorted your would do:
If you do not care about the results being in an array just $unwind and $group:
db.junk.aggregate([
{ "$unwind": "$values" },
{ "$group": {
"_id": "$type",
"ftime": { "$first": "$values.time" },
"fvalue": { "$first": "$values.value" },
"ltime": { "$last": "$values.time" },
"lvalue": { "$last": "$values.value" },
}}
])
For those results in array then there is a trick to it:
db.collection.aggregate([
{ "$unwind": "$values" },
{ "$project": {
"type": 1,
"values": 1,
"indicator": { "$literal": ["first", "last"] }
}},
{ "$group": {
"_id": "$type",
"ftime": { "$first": "$values.time" },
"fvalue": { "$first": "$values.value" },
"ltime": { "$last": "$values.time" },
"lvalue": { "$last": "$values.value" },
"indicator": { "$first": "$indicator" }
}},
{ "$unwind": "$indicator" },
{ "$project": {
"values": {
"time": {
"$cond": [
{ "$eq": [ "$indicator", "first" ] },
"$ftime",
"$ltime"
]
},
"value": {
"$cond": [
{ "$eq": [ "$indicator", "first" ] },
"$fvalue",
"$lvalue"
]
}
}
}},
{ "$group": {
"_id": "$_id",
"values": { "$push": "$values" }
}}
])
If your array is not sorted place an additional $sort stage before the very first $group to make sure your items are in the order you want them to be evaluated by $first and $last. A logical order where is by the "time" field, so:
{ "$sort": { "type": 1, "values.time": 1 } }
The $literal declares an array to identify the values of "first" and "last" which are later "unwound" to create two copies of each grouped document. These are then evaluated using the $cond operator to re-assign to a single field for "values" which is finally push back into an array using $push.
Remember to allways try to $match first in the pipeline in order to reduce the number of documents you are working on to what you reasonable want. You pretty much never want to do this over whole collections, especially when you are using $unwind on arrays.
Just as a final note $literal is introduced/exposed in MongoDB 2.6 and greater versions. For prior versions you can interchange that with the undocumented $const.