MongoDB aggregate: count of elements in an object key - mongodb

I have a MongoDB document like this:
[
{
"_id": {
"$oid": "5ff09030cd55d6d9f378d460"
},
"username": "a value",
"userid": "123456",
"last_access_ts": 1612426253,
"last_access": "2021-2-4 9:10:53",
"anotherid": 12345678910,
"verified_date": "2021-1-2 16:24:32",
"verified_ts": 1609601072,
"group_users": {
"-1001159747589": [
{
"anotherid": 12345678910,
"userid": "123456"
}
],
"-1001143137644": [
{
"anotherid": 12345678910,
"userid": "123456"
}
],
"-1001368608972": [
{
"anotherid": 12345678910,
"userid": "123456"
}
]
},
"registered_access": "2021-1-2 16:24:42",
}
]
I've two questions.
First one: I need to count the elements inside each group_users[key] object, and I'm stuck with this aggregate:
db.collection.aggregate([
{
$match: {
username: "a value"
}
},
{
$project: {
_id: 1,
userid: 1,
"groups": {
"$objectToArray": "$group_users"
}
}
},
{
$unwind: "$groups",
},
])
This aggregate gives me this result:
[
{
"_id": ObjectId("5ff09030cd55d6d9f378d460"),
"groups": {
"k": "-1001449720492",
"v": [
{
"anotherid": 12345678910,
"userid": "123456"
}
]
},
"userid": "123456"
},
{
"_id": ObjectId("5ff09030cd55d6d9f378d460"),
"groups": {
"k": "-1001159747589",
"v": [
{
"anotherid": 12345678910,
"userid": "123456"
}
]
},
"userid": "123456"
},
{
"_id": ObjectId("5ff09030cd55d6d9f378d460"),
"groups": {
"k": "-1001143137644",
"v": [
{
"anotherid": 12345678910,
"userid": "123456"
}
]
},
"userid": "123456"
}
]
How can I count each single groups[v] and then re-group the data? I would like to have a result like:
{
... some user data
"groups": {
"group_key": "count",
"second_group_key": "count",
"third_group_key": "count"
}
}
Is it possible with aggregate or I need to loop in the code?
My second question is always about the group_users. Is possible to have, recursively, the user data inside a group_users object?
I mean, every object inside group_users is an array of users; from this array can I have the user data (maybe with $graphLookup?) using the userid field or the anotherid field?
As a result from this second aggregate I would like to have something like this:
{
... some user data
"groups": {
"group_key": [{"userid": userid, "username": username}],
"second_group_key": [{"userid": userid, "username": username}],
"third_group_key": [{"userid": userid, "username": username}]
}
}
Obviously I can limit this "recursion" to 10 elements per time.
Thanks for any advice.

$objectToArray convert group_users object to array
$let to Bind variable group_arr for use in the specified expression, and returns the result of the expression.
$map to iterate loop of bind variable group_arr, get size of total element of v and return k and v,
$arrayToObject convert returned array from $map to object
db.collection.aggregate([
{ $match: { username: "a value" } },
{
$project: {
_id: 1,
userid: 1,
groups: {
$let: {
vars: { group_arr: { $objectToArray: "$group_users" } },
in: {
$arrayToObject: {
$map: {
input: "$$group_arr",
in: {
k: "$$this.k",
v: { $size: "$$this.v" }
}
}
}
}
}
}
}
}
])
Playground
Second question,
$unwind deconstruct groups array
$lookup with pipeline, match anotherid $in condition and return required fields
$group by _id and reconstruct groups array
$arrayToObject convert groups array to object
db.collection.aggregate([
{ $match: { username: "a value" } },
{
$project: {
_id: 1,
userid: 1,
groups: { $objectToArray: "$group_users" }
}
},
{ $unwind: "$groups" },
{
$lookup: {
from: "collection",
let: { anotherid: "$groups.v.anotherid" },
pipeline: [
{ $match: { $expr: { $in: ["$anotherid", "$$anotherid"] } } },
{
$project: {
_id: 0,
userid: 1,
username: 1
}
}
],
as: "groups.v"
}
},
{
$group: {
_id: "$_id",
groups: { $push: "$groups" },
userid: { $first: "$userid" }
}
},
{ $addFields: { groups: { $arrayToObject: "$groups" } } }
])
Playground

Related

Lodash `countBy` equivalent in MongoDB?

Let's say I have the input docs below:
[
{
"_id": "6225ca4052e7c226e2dd836d",
"data": [
"07",
"07",
"12",
"19",
"07",
"32"
]
},
{
"_id": "6225ca4052e7c226e2dd888f",
"data": [
"99",
"97",
"52",
"99",
"58",
"92"
]
}
]
I want to count the occurrences of every element in data string array per document. In JS, I can use countBy. How can I achieve the same using MongoDB Aggregation Framework?
I have tried to $reduce but MongoDB seems to not support assigning dynamic field to object.
{
$reduce: {
input: '$data',
initialValue: {},
in: { // assign `$$this` with count to `$$value`, but failed! }
}
}
Below is the desired output.
[
{
"_id": "6225ca4052e7c226e2dd836d",
"freqs": {
"12": 1,
"19": 1,
"32": 1,
"07": 3
}
},
{
"_id": "6225ca4052e7c226e2dd888f",
"freqs": {
"52": 1,
"58": 1,
"92": 1,
"97": 1,
"99": 2
}
}
]
db.collection.aggregate([
{
$match: {}
},
{
$unwind: "$data"
},
{
$group: {
_id: "$data",
c: { $sum: 1 },
id: { $first: "$_id" }
}
},
{
$group: {
_id: "$id",
data: { $push: { k: "$_id", v: "$c" } }
}
},
{
$set: {
data: { $arrayToObject: "$data" }
}
}
])
mongoplayground
db.collection.aggregate([
{
$set: {
data: {
$function: {
body: "function(d) {let obj = {}; d.forEach(e => {if(obj[e]==null) { obj[e]=1; }else{ obj[e]++; }}); return obj;}",
args: [
"$data"
],
lang: "js"
}
}
}
}
])
mongoplayground

How do I find "$oid" in MongoDB instead of _id?

I'm trying to do a find search in MongoDB and I just want to return the objectId as a string, but instead I get:
[
{
"_id": {
"$oid": "6114ffebae76283ccfb92c44"
}
}
[
I want:
[
{
"$oid": "6114ffebae76283ccfb92c44"
}
]
Here is my query:
db.person.find(
{ $and: [{"age":20},{"name":"Frank"}] }, {"_id":1}
)
You could apply a $toString to get the id in your desired format.
db.collection.find({},
{
"_id": {
$toString: "$_id"
}
})
However you can't use the $oid as field name because is a reserved keyword.
Example: https://mongoplayground.net/p/y1DsrQ-k4Hj
You can use aggregate:
db.collection.aggregate([
{
$project: {
_id: {
$toString: "$_id"
}
}
}
])
Here you can test it (I set the name of _id to id)
https://mongoplayground.net/p/xdw3PpiwYId
db.person.find({
$and: [
{
"age": 20
},
{
"name": "Frank"
}
]
},
{
"id": {
$toString: "$_id"
},
"_id": 0
})

MongoDB Query - Get frequency map of an array

[
{
"_id": ObjectId("id-1"),
"tests": [
{
"category": "cat1",
"status": "status1",
},
{
"category": "cat1",
"status": "status2",
},
{
"category": "cat2",
"status": "status2",
},
],
},
{
"_id": ObjectId("id-2"),
"tests": [
{
"category": "cat2",
"status": "status1",
},
{
"category": "cat1",
"status": "status1",
},
{
"category": "cat1",
"status": "status2",
},
],
}
]
I have the above collection, my intention is to generate the below result. Please note that the statuses and categories are dynamic.
[
{
"id" : id-1,
"status": {
"status1": count,
"status2": count
},
"category": {
"cat1": count of it,
"cat2": count of it
}
},
{
"id" : id-2,
"status": {
"status1": count of it,
"status2": count of it
},
"category": {
"cat1": count of it,
"cat2": count of it
}
}
]
What I've attempted to do till now, is
Unwinded tests field, then
{
"$group": {
"_id": {
"id": "$_id",
"testStatus": "$tests.status"
},
"val": {
"$sum": 1
}
}
},
{
"$group": {
"_id": {
"id": "$_id.id",
},
"resGroup": {
"$addToSet": {
k: "$_id.testStatus",
v: "$val"
}
}
}
},
{
"$project": {
"_id": "$_id.id",
"statusGroup": {
"$arrayToObject": "$resGroup"
}
}
}
I've done the same for the category field and used $facet to run multiple aggregations.
But, am unable to fetch the result in the required format.
Any help on this will be appreciated.
Thanks
MongoDB Version: 3.4
$map to iterate loop of tests array and convert the object to an array using $objectToArray
$unwind deconstruct tests array
$unwind again deconstruct tests array because it's a nested array
$group by _id, k, and v and get the total count
$group by _id and k and construct the array of the status field in items
$arrayToObject convert items key-value array to an object
$group by _id and construct the array of items
$arrayToObject convert items array to object
db.collection.aggregate([
{
$project: {
tests: {
$map: {
input: "$tests",
in: { $objectToArray: "$$this" }
}
}
}
},
{ $unwind: "$tests" },
{ $unwind: "$tests" },
{
$group: {
_id: {
_id: "$_id",
k: "$tests.k",
v: "$tests.v"
},
count: { $sum: 1 }
}
},
{
$group: {
_id: {
_id: "$_id._id",
k: "$_id.k"
},
items: {
$push: {
k: "$_id.v",
v: "$count"
}
}
}
},
{
$group: {
_id: "$_id._id",
items: {
$push: {
k: "$_id.k",
v: { $arrayToObject: "$items" }
}
}
}
},
{ $project: { items: { $arrayToObject: "$items" } } }
])
Playground

MongoDB $lookup on nested document, limit and count the retrieved data

I would like to get a count of all notifications that aren't read by an User ("A", "B", "C", etc) for each subRoom. Taking into account that it could be millions of notifications documents and hundreds of subrooms elements in Rooms Collections, i need to limit it. For that reason I've limited the $lookup for first 100 elements and then check if that notifications have been read or not by an User. I did it using documents (roomId) in $lookup but I cant do it using subdocuments (subRoom.id).
Notifications Collection is indexed using a Compound of (roomId: 1, timestamp: -1)
Notifications Collection: (id corresponds to notification id and roomId is the link to Rooms collection)
[{
"_id": "XXX",
"id": "1",
"read": ["A", "B", "C"],
"roomId": "c1d87a4c-231d-4cc8-8438-35cf21ed7fc5",
"content": "XXX",
"timestamp": { "$date": "2021-12-31T22:50:53.000Z" }
},{
"_id": "XXX",
"id": "2",
"read": ["C"],
"roomId": "c1d87a4c-231d-4cc8-8438-35cf21ed7fc5",
"content": "XXX",
"timestamp": { "$date": "2021-12-31T22:50:53.000Z" }
},
...
]
Rooms Collection:
[{
"_id": "XXX"
"subRoom": [{
"id": "c1d87a4c-231d-4cc8-8438-35cf21ed7fc5",
"image": "XXX",
"name": "XXX"
}, {
"id": "c2d5081e-0cf1-4e69-937d-be357da1d104",
"image": "XXX",
"name": "XXX"
}, {
"id": "530c2c02-26e8-441c-af39-c5232dfe1f73",
"image": "XXX",
"name": "XXX"
}],
"id": "453a6458-6545-4842-8946-05f49efea216",
"name": "XXX",
},
...
]
Code working using roomId instead subRoom.id:
{ $lookup: {
from: "notifications",
let: { "id": "$id" },
pipeline: [
{ $match: {
$expr:
{ $eq: [ "$roomId", "$$id" ] }
}},
{ $limit: 100},
{ $project: {_id: 0, read: 1}}
],
as: "messages"
}},
{ $project: {_id: 0, id: 1, notRead: {
$size: {
$filter: {
input: "$notifications",
cond: {
$not: {
$in: [
"A",
"$$this.read"
]
}
}
}
}
},
}
Code NOT WORKING using subRoom.id:
{ $lookup: {
from: "notifications",
let: { "id": "$subRoom.id" },
pipeline: [
{ $match: {
$expr:
{ $eq: [ "$roomId", "$$id" ] }
}},
{ $limit: 100},
{ $project: {_id: 0, read: 1}}
],
as: "messages"
}},
{
$addFields: {
items: {
$map: {
input: { $zip: { inputs: ["$subRoom", "$messages"] } },
in: { $mergeObjects: "$$this" },
},
},
},
},
.
. projection
.
Expected Result:
[{
"_id": "XXX"
"subRoom": [{
"id": "c1d87a4c-231d-4cc8-8438-35cf21ed7fc5",
"notRead": 50 //e.g
}, {
"id": "c2d5081e-0cf1-4e69-937d-be357da1d104",
"notRead": 35 //e.g
}, {
"id": "530c2c02-26e8-441c-af39-c5232dfe1f73",
"image": "XXX",
"notRead": 5 //e.g
}],
"id": "453a6458-6545-4842-8946-05f49efea216",
"name": "XXX",
},
...
]
Finally and very importantly, I want an scalable solution that can be done with big data.
Thank you very much in advance.
$unwind deconstruct subRoom array with preserve null and empty array property
$lookup with notification collection using pipeline, let to pass id to pipeline, check condition for roomId and user should not read notification
$group by null and count total unread notifications
$addFields to get count to notifications using $sum
$group by _id and reconstruct the subRoom array with required fields in result
db.rooms.aggregate([
{
$unwind: {
path: "$subRoom",
preserveNullAndEmptyArrays: true
}
},
{
$lookup: {
from: "nitifications",
let: { id: "$subRoom.id" },
pipeline: [
{
$match: {
$and: [
{ $expr: { $eq: ["$$id", "$roomId"] } },
{ read: { $ne: "A" } }
]
}
},
{
$group: {
_id: null,
count: { $sum: 1 }
}
}
],
as: "subRoom.notRead"
}
},
{
$addFields: {
"subRoom.notRead": { $sum: "$subRoom.notRead.count" }
}
},
{
$group: {
_id: "$_id",
name: { $first: "$name" },
id: { $first: "$id" },
subRoom: { $push: "$subRoom" }
}
}
])
Playground
Second option without using $unwind stage,
$lookup with notification collection using pipeline, let to pass id to pipeline, check condition for roomId and user should not read notification
$group by null and count total unread notifications
$map to iterate loop of subRoom array
$filter to iterate loop of return result from lookup notifications count and get current subRoom document
$let to declare a variable n and assign above filtered result to it and return $sum from count
$mergeObjects to merge current object of subRoom and new field notRead
db.rooms.aggregate([
{
$lookup: {
from: "nitifications",
let: { id: "$subRoom.id" },
pipeline: [
{
$match: {
$and: [
{ $expr: { $in: ["$roomId", "$$id"] } },
{ read: { $ne: "A" } }
]
}
},
{
$group: {
_id: "$roomId",
count: { $sum: 1 }
}
}
],
as: "notRead"
}
},
{
$project: {
id: 1,
name: 1,
subRoom: {
$map: {
input: "$subRoom",
as: "s",
in: {
$mergeObjects: [
"$$s",
{
notRead: {
$let: {
vars: {
n: {
$filter: {
input: "$notRead",
cond: { $eq: ["$$this._id", "$$s.id"] }
}
}
},
in: { $sum: "$$n.count" }
}
}
}
]
}
}
}
}
}
])
Playground

Wildcard for key in mongodb query

I have a collection equivalent to:
[
{
"_id": ObjectId("5a934e000102030405000000"),
"sides": {
"0": {
"dist": 100
},
"1": {
"dist": 10
}
}
},
{
"_id": ObjectId("5a934e000102030405000001"),
"sides": {
"0": {
"dist": 100
}
}
}
]
I would like to perform a query that return any documents that has for any key nested in sides has the key dist with a specific value. Something like:
db.collection.find({"sides.*.dist": 10})
Here * acts as a wildcard, any key would be valid in its place.
That would retrieve:
[
{
"_id": ObjectId("5a934e000102030405000000"),
"sides": {
"0": {
"dist": 100
},
"1": {
"dist": 10
}
}
}
]
On the other hand
db.collection.find({"sides.*.dist": 100})
Would retrive both documents.
the following song and dance won't be neccessary if sides field was an array...
db.collection.find(
{
$expr: {
$gt: [{
$size: {
$filter: {
input: { $objectToArray: "$sides" },
as: "x",
cond: { $eq: ["$$x.v.dist", 10] }
}
}
}, 0]
}
})
You could get the matching elements using this
db.collection.aggregate([
{
"$project": {
"sides_array": {//Reshape the sides
"$objectToArray": "$sides"
}
}
},
{//Denormalize to get more than one matches
"$unwind": "$sides_array"
},
{
"$match": {//Condition goes here
"sides_array.v.dist": 10
}
},
{
"$group": {//Group the data back, after unwinding
"_id": "$_id",
"sides": {
"$push": "$sides_array"
}
}
},
{
"$project": {//Reshape the data
"_id": 1,
"sides": {
"$arrayToObject": "$sides"
}
}
}
])