MongoDB order by a sum on a subset - mongodb

I have the following collection:
error_reports
[
{
"_id":{
"$oid":"5184de1261"
},
"date":"29/04/2013",
"errors":[
{
"_id":"10",
"failures":2,
"alerts":1,
},
{
"_id":"11",
"failures":7,
"alerts":4,
}
]
},
{
"_id":{
"$oid":"5184de1262"
},
"date":"30/04/2013",
"errors":[
{
"_id":"15",
"failures":3,
"alerts":2,
},
{
"_id":"16",
"failures":9,
"alerts":1,
}
]
}
]
Is it possible to retrieve the list of documents with failures and alerts sum sorted by failures in descending order? I am new to mongodb, I have been searching for 2 days but I can't figure out what is the proper query...
I tried something like this :
db.error_reports.aggregate(
{ $sort : { failures: -1} },
{ $group:
{ _id: "$_id",
failures: { "$sum": "$errors.failures" }
}
}
);
But it didn't work, I think it is because of the $sum: $errors.failures thing, I would like to sum this attribute on every item of the day_hours subcollection but I don't know of to do this in a query...

You were very close with your attempt. The only thing missing is the $unwind aggregation operator. $unwind basically splits each document out based on a sub-document. So before you group the failures and alerts, you unwind the errors, like so:
db.error_reports.aggregate(
{ $unwind : '$errors' },
{ $group : {
_id : '$_id',
'failures' : { $sum : '$errors.failures' },
'alerts' : { $sum : '$errors.alerts' }
} },
{ $sort : { 'failures': -1 } }
);
Which gives you the follow result:
{
"result" : [
{
"_id" : ObjectId("5184de1262"),
"failures" : 12,
"alerts" : 3
},
{
"_id" : ObjectId("5184de1261"),
"failures" : 9,
"alerts" : 5
}
],
"ok" : 1
}

Related

Count if a value exists inside the array of objects [ MongoDB ]

I am developing a simple chat application with MongoDB and got stuck into a situation.
My document in database is as
{
"_id" : ObjectId("605a217ed8168f4c262f4782"),
"message" : "Hi, This is a test message",
"created" : ISODate("2021-03-23T17:12:30.000Z"),
"user" : {
"_id" : ObjectId("5977af7df1d8cc4623283b14"),
"name" : "Sender Of Message"
},
"recipients" : [
{
"_id" : ObjectId("5977af7df1d8cc4623283b14"),
"time" : ISODate("2021-03-23T17:12:30.000Z")
},
{
"_id" : ObjectId("5df50a5eaa0e3c3104006101"),
"time" : ISODate("2021-03-23T17:12:35.000Z")
}
],
"target" : {
"_id" : ObjectId("5df50a5eaa0e3c3104006101"),
"name" : "Target Person"
},
"status" : 1
}
When I try to get the last message with the unread count of the user I am always getting 1
Here is the query that I tried on.
db.collection.aggregate([
{ $match: { 'target._id': ObjectId('5df50a5eaa0e3c3104006101'), status: 1 } },
{ $sort: { _id: -1 } },
{
$group: {
_id: '$user._id',
doc: { $first: '$$ROOT' },
unread: {
$sum: {
$cond: {
if: { $ne: [ ObjectId('5df50a5eaa0e3c3104006101'), '$recipients._id' ] },
then: 1,
else: 0
}
}
}
}
}
])
If the collection contains even just the one document above, it is supposed to give 0 as the object inside the recipients array already contains the _id as ObjectId('5df50a5eaa0e3c3104006101'), but I'm getting 1 for the unread count. Any help?
Here is the output that I get from the query
{
"_id" : ObjectId("5977af7df1d8cc4623283b14"),
"doc" : {
"_id" : ObjectId("605a217ed8168f4c262f4782"),
"message" : "Hi, This is a test message",
"created" : ISODate("2021-03-23T17:12:30.000Z"),
"user" : {
"_id" : ObjectId("5977af7df1d8cc4623283b14"),
"name" : "Sender Of Message"
},
"recipients" : [
{
"_id" : ObjectId("5977af7df1d8cc4623283b14"),
"time" : ISODate("2021-03-23T17:12:30.000Z")
},
{
"_id" : ObjectId("5df50a5eaa0e3c3104006101"),
"time" : ISODate("2021-03-23T17:12:35.000Z")
}
],
"target" : {
"_id" : ObjectId("5df50a5eaa0e3c3104006101"),
"name" : "Target Person"
},
"status" : 1
},
"unread" : 1.0
}
I know why its showing with the count as 1
The array recipients contains an object with _id as ObjectId("5977af7df1d8cc4623283b14") inside it, so its a non matching condition. Which is causing the if condition to be satisfied and produce a value 1.
But I need to figure out how to query it to get the actual value.
Please note that I cant use $push operator on recipients array as it might have greater amount of object ( maybe in future )
Thanks for the support, but I have found the answer by myself.
Here is my approch to get the data as per the requirement.
Instead of searching for the records within the array what I did is
Filtered the data array to the _id that I don't need, so the array will have exactly one document or else it will be empty.
When taking the negation of the condition. ie, when there is one value in the array I need the counter to be 0 or else it should be 1
So I used the $size to check the array's size and $filter to filter out the other _ids and then used $sum to increment the counter as required.
db.collection.aggregate([
{ $match: { 'target._id': ObjectId('5df50a5eaa0e3c3104006101'), status: 1 } },
{ $sort: { _id: -1 } },
{
$group: {
_id: '$user._id',
doc: { $first: '$$ROOT' },
unread: {
$sum: {
$cond:{
if: {
$size: {
$filter: {
input: '$recipients',
as: 'item',
cond: { $eq: [ ObjectId('5df50a5eaa0e3c3104006101'), '$$item._id' ] }
}
}
},
then: 0,
else: 1
}
}
}
}
}
])
Try to Use like this:
db.getCollection('test').aggregate([
{ $match: { 'target._id': ObjectId('5df50a5eaa0e3c3104006101'), status: 1
} },
{ $sort: { _id: -1 } },
{ $unwind: { path: "$recipients", preserveNullAndEmptyArrays: true } },
{
$group: {
_id: '$user._id',
doc: { $first: '$$ROOT' },
unread: {
$sum: {
$cond: {
if: { $ne: ['$recipients._id',
ObjectId('5df50a5eaa0e3c3104006101') ] },
then: 1,
else: 0
}
}
}
}
}
])

mongo count rows from an array of provided data

I have collection like this:
{
"_id" : ObjectId("4d663451d1e7242c4b68e000"),
"topic" : "abc",
"subLevel" : {
"id" : 1
}
}
{
"_id" : ObjectId("4d6634514cb5cb2c4b69e000"),
"topic" : "bce",
"subLevel" : {
"id" : 1
}
}
{
"_id" : ObjectId("4d6634514cb5cb2c4b70e000"),
"topic" : "bec",
"subLevel" : {
"id" : 2
}
}
{
"_id" : ObjectId("4d6634514cb5cb2c4b70e000"),
"topic" : "vvv",
"subLevel" : {
"id" : 3
}
}
and I need to count how many documents exist for provided subLevel.id list, for example if I provide 1 and 2 it should show me that for 1 we have 2 documents and for 2 only 1 document and simply omit document where subLevel.id is 3 as it's not in the list of id's.
I tried to do it with a aggregate
db.getCollection('products').aggregate( [
{ $project:
{ "has_sublevel" : {$in: [ "subLevel.id", [1 , 2 ]]} }
},
{ $group: { _id : "$subLevel.id", count: { $sum: 1 } } }
] )
but result is
{
_id : null,
count: 4
}
how can I do it, thanks in advance!
If transform it to SQL which I familiar more, query should look like this:
select subLevelId, count(id) FROM products where subLevelId in (1,2) group by subLevelId
If I've understand correctly, you are so so close, check this query:
First use $match to get only documents whose subLevel.id is 1 or 2.
Then, as you have done, $group by the id and sum to get total count:
db.collection.aggregate([
{
"$match": { "subLevel.id": { "$in": [ 1, 2 ] } }
},
{
"$group": { "_id": "$subLevel.id", "count": { "$sum": 1 } }
}
])
Example here
You will need this:
db.products.aggregate([ {$match:{ "subLevel.id":{ $in:[1,2] } }} , {$group:{ _id:"$subLevel.id" , count:{$sum:1} } } ])
which is same like:
db.products.aggregate([ {$match:{ $or:[{"subLevel.id":1},{"subLevel.id":2} ]}} , {$group:{ _id:"$subLevel.id" , count:{$sum:1} } } ])
You need to push the respective docs into their respective arrays and then get their sizes:
db.collection.aggregate([
{
"$match": {
"subLevel.id": {
$in: [
1,
2
]
}
}
},
{
$group: {
"_id": "$subLevel.id",
ids: {
$push: "$_id"
}
}
},
{
$project: {
_id: false,
ids: {
$size: "$ids"
}
}
}
])
Playgroud: https://mongoplayground.net/p/J1ei37l1K5-

MongoDB two groups Aggregate

Aggregation operations process data records and return computed results. Aggregation operations group values from multiple documents together, and can perform a variety of operations on the grouped data to return a single result. MongoDB provides three ways to perform aggregation: the aggregation pipeline, the map-reduce function, and single purpose aggregation methods.
I would like to transform that :
{
"_id" : ObjectId("5836b919885383034437d4a7"),
"Identificador" : "G-3474",
"Miembros" : [
{
"_id" : ObjectId("5836b916885383034437d238"),
"Nombre" : "Pilar",
"Email" : "pcarrillocasa#gmail.es",
"Edad" : 24,
"País" : "España",
"Tipo" : "Usuario individual",
"Apellidos" : "Carrillo Casa",
"Teléfono" : 637567234,
"Ciudad" : "Santander",
"Identificador" : "U-3486",
"Información_creación" : {
"Fecha_creación" : {
"Mes" : 4,
"Día" : 22,
"Año" : 2016
},
"Hora_creación" : {
"Hora" : 15,
"Minutos" : 34,
"Segundos" : 20
}
}
}
}
into that
{
"Nombre_Grupo" : "Amigo invisible"
"Ciudades" : [
{
"Ciudad" : "Madrid",
"Miembros": 30
},
{
"Ciudad" : "Almería",
"Miembros": 10
}
{
"Ciudad" : "Badajoz",
"Miembros": 20
}
]
}
with MongoDB.
I tried with that:
db.Grupos_usuarios.aggregate([
{ $group: { _id: "$Nombre_Grupo",total: { $sum: "$amount" } },
$group: { _id: "$Ciudad",total: { $sum: "$amount" } } }
])
but I could not get what I needed.
May somebody help me to know what I am doing bad?
The following aggregation gets the output you are looking for.
The $unwind stage deconstructs an array field from the input documents to output a document for each element. These documents are used to group by the Miembros.Ciudad and get the total Miembros for each Ciudad. In the second group stage we Pivot data to get all the Ciudades from the previous grouping into an array. The last $project is for formatting the output.
db.test.aggregate( [
{
$unwind: "$Miembros"
},
{
$group: {
_id: "$Miembros.Ciudad",
total: { $sum: 1 }
}
},
{
$group: {
_id: "Amigo invisible",
Ciudades: { $push: { Ciudad: "$_id", Miembros: "$total"} }
}
},
{
$project: {
Nombre_Grupo: "$_id",
Ciudades: 1,
_id: 0
}
}
] )

Mongodb splitting aggregation result

I'm currently trying to split an aggregation result in two differents arrays using only mongodb.
My main goal is to create two subset of user with the same distribution regarding the number of interactions that they have made. For this I'm currently making this request:
db.getCollection('Interaction').aggregate([
{ $group : { _id : "$userId", count: { $sum: 1 }}},
{ $sort : { count : -1 }},
{ $group : { _id :{$mod : [_rand() * 2, 2]}, ids : { $push: "$_id"}}}
}
My main issue actualy is that the _rand() function is called only once during the aggregation execution to I only have all my result in a single array.
Also, a random distribution is not so good. Is there a way to use the index of each result ?
Edit 1 :
After #dnickless answer I still got an issue on distribution in the groupBy part. Ideally I would like to do something like this
db.getCollection('Interaction').aggregate([
{ $group : { _id : "$userId", count: { $sum: 1 }}},
{ $sort : { count : -1 }},
{ $bucket: {
groupBy: { $mod: [ { $indexOfArray : ??? }, 2 ] },
boundaries: [ 0, 1 ],
default: 2,
output: {
"users": { $push: "$_id"}
}
}
}
],
{ allowDiskUse: true })
That could split even index and odd index into two separated array. But I would like to apply the $indexOfArray on the current aggregation result.
To give you more context here is my Interaction object model :
{ "_id" : ObjectId("5af01..."), "name" : "WATCH", "date" : ISODate("2018-05-07T09:32:53.219Z") }
Without the bucket part I have this result :
{ "_id" : "5b1e7f...", "count" : 43.0 }
{ "_id" : "5b1e75...", "count" : 41.0 }
{ "_id" : "5b1e7a...", "count" : 40.0 }
...
I would like my answer to look like this :
{
{ "_id" : 0, "users" : [ "5b1e7f...", "5b1e7a...", ... ] }, // even index results
{ "_id" : 1, "users" : [ "5b1e75...", ... ] } // odd index results
}
My end goal is to split my users in 2 groups with evenly distributed numbers of interactions.
Edit 2 :
Finally found a solution to resolve my problem :
db.getCollection('Interaction').aggregate([
{ $group : { _id : "$userId", count: { $sum: 1 }}},
{ $sort : { count : -1 }},
{ $group : { _id : "whatever" , user : { $push : { _id : "$_id" , count : "$count"}}}},
{ $unwind : { path : "$user" , "includeArrayIndex" : "rank"}},
{ $bucket: {
groupBy: { $mod: [ "$rank" , 2 ] },
boundaries: [ 0, 1 ],
default: 2,
output: {
"users": { $push: "$user._id"}
}
}
}
],
{ allowDiskUse: true })
Probably not the most optimized solution at all, but still do the job :)
If you have any advise to improve it I'm still interested in.
I don't fuly understand what exactly you are trying to achieve here without seeing some sample input and output. However, have you tried using $bucketAuto? Something like this:
db.getCollection('Interaction').aggregate([
{ $group : { _id : "$userId", count: { $sum: 1 }}},
{ $bucketAuto : {
groupBy : "$count",
buckets : 2, // number of buckets goes here
output : {
ids : { $push : "$id" }
}
}
}])
If you want to go more sophisticated regarding the distribution aspect you could perhaps try something like this which would throw all even counts into one pot and all odd ones into another:
$bucket: {
groupBy: { $mod: [ "$count", 2 ] },
boundaries: [ 0, 1 ],
default: 2,
output: {
"docs": { $push: "$$ROOT" }
}
}
Depending on the type of your userId field you could perhaps come up with a more "random" distribution.
Lastly, I am not sure what exactly you mean by
"Is there a way to use the index of each result ?"
Perhaps something like $size, $arrayElemAt and/or $indexOfArray...?
Alternatively, you could perhaps try to $slice the sorted array into two equally sized parts (using $size $divided by 2), then $reverseArray one of them and then $zip both arrays up again which should result in something like when you shuffle a deck of playing cards. After that, you would need to flatten the nested array into a single one again (using $reduce and $concatArrays or so) and then slice the array again in two parts which should be what you are looking for if I am not too tired by now to think through the statistical parts here.

Cant find duplicate values for array part in mongodb

db.school.find({ "merchant" : "cc8c0421-e7fc-464d-9e1d-37e168b216c3" })
this is an example document from school collection of that query:
{
"_id" : ObjectId("57fafasf2323232323232f57682cd42"),
"status" : "wait",
"merchant" : "cc8c0421-e7fc-464d-9e1d-37e168b216c3",
"isValid" : false,
"fields" : { "schoolid" : {
"value" : "2323232",
"detail" : {
"revisedBy" : "teacher",
"revisionDate" : ISODate("2015-06-24T09:22:44.288+0000")
},
"history" : [
]
}}
}
I want to see which has duplcate schoolid. SO i do this:
db.school.aggregate([
{$match:{ "merchant" : "cc8c0421-e7fc-464d-9e1d-37e168b216c3"
{ $group: {
_id: { fields.schoolid.value: "$fields.schoolid.value" },
count: { $sum: 1 }
} },
{ $match: {
count: { $gte: 2 }
} },
{ $sort : { count : -1} },
{ $limit : 10 }
]);
but it gives error.
a lot of errors for a lot of lines
i tried to do like this
_id: { "fields.schoolid.value": "$fields.schoolid.value" },
or
_id: { 'fields.schoolid.value': "$'fields.schoolid.value'" },
but did not work. ow can i use it?
According to the document you provided, there is no fields field, so the group stage can't work. Your query should be :
db.school.aggregate([
{ $match: { "merchant" : "cc8c0421-e7fc-464d-9e1d-37e168b216c3"}},
{ $group: {
_id: { value: "$fields.schoolid.value" },
count: { $sum: 1 }
} },
{ $match: {
count: { $gte: 2 }
} },
{ $sort : { count : -1} },
{ $limit : 10 }
]);
Also note that fields.schoolid.value is not a valid fieldname, you need to enclode it in "" or to remove the "."