Mongo Aggregation does not analyze all documents - mongodb

I'm trying to get some statistics using Mongo's aggregation Framework but my queries do not seem to work right. So I have a collection of documents, each document having the structure below:
{
"_id" : ObjectId("5e46af306d0f5d63d4de6d0f"),
"homeEmperor" : {
"name" : "Home",
"units" : {
"Mage" : 15,
"Warrior" : 15,
"Swordmaster" : 15,
"Rogue" : 15,
"Warlock" : 15
}
},
"awayEmperor" : {
"name" : "Away",
"units" : {
"Druid" : 15,
"Ranger" : 15,
"Priest" : 15,
"Monk" : 15,
"Dragon" : 15
}
},
"dateCreated" : ISODate("2020-02-06T00:00:00.000Z"),
"winner" : "away",
"battleType" : "mutual",
"turns" : 45,
"_class" : "com.deathstar.Datahouse.domain.mongo.HistoricMongoRecord"
}
So what I'm doing at the moment is using this query to get the number of wins each unit has
db.getCollection('WarHistory').aggregate([
{ $match: { battleType: "mutual" } },
{ $project: {"winners": {$cond: { if: { $eq: [ "$winner", "home" ] }, then: "$homeEmperor.units", else: "$awayEmperor.units"} }} },
{ $project: {"winnerUnits": { $objectToArray: "$winners" }} },
{ $group: { _id: {unitType: "$winnerUnits.k"}} },
{ $unwind: "$_id.unitType" },
{ $group: { _id: {unitType: "$_id.unitType"}, count:{$sum:1} }},
{ $sort : { count : -1 }}
])
and this query to get each unit's participation
db.getCollection('WarHistory').aggregate([
{ $match: { battleType: "mutual" } },
{ $project: { "participants": { $mergeObjects: [ "$homeEmperor.units", "$awayEmperor.units" ] } } },
{ $project: {"participantUnits": { $objectToArray: "$participants" }} },
{ $group: { _id: {unitType: "$participantUnits.k"}} },
{ $unwind: "$_id.unitType" },
{ $group: { _id: {unitType: "$_id.unitType"}, count:{$sum:1} }},
{ $sort : { count : -1 }}
])
The output of both queries is as shown below which is the way I want it:
{
"_id" : {
"unitType" : "Pirate"
},
"count" : 1331.0
}
After that I divide them and so on. I also have a Java service which produces these documents.
The problem is that while at first the queries seem to work fine, after some point they seem to stop counting all documents, so I see the "Count" counter in the Collection increasing but the query results remain exactly the same.
I've checked and the documents continue to have the same structure along the way.
I also tried the "allowDiskUse:true" parameter in case the problem was the memory capacity(I have 16GB RAM), but it really made no difference.
Can anyone please confirm that the queries above can do what I want them to?
Any help would be really appreciated! Thanks for your time!

Related

writing aggregate MongoDB query to calculate field count ids

I'm writing an aggregate query for the following records and output.
Data:
[
{
"_id" : ObjectId("5f3b2626927b18001db86884"),
"collections" : [
Art, Craft
]
},{
"_id" : ObjectId("5f3b2626927b18001db86885"),
"collections" : [
Craft
]
},{
"_id" : ObjectId("5f3b2626927b18001db86886"),
"collections" : [
Apex, Art
]
},
...
]
Expected Output:
count of collections id
{
Art : 2,
Craft : 2,
Apex : 1
}
Right now, we are looping through the collection to calculate count for each collections as the desired output, but it is low in performance because this collection is consists of 10,000 of records.
So, I was thinking to build an aggregate query and if someone can help me to start or point towards a right direction that would be really appreciated. Thank you.
$unwind
$group
$group
$replaceRoot
db.collection.aggregate([
{
$unwind: "$collections"
},
{
"$group": {
"_id": "$collections",
"v": {
"$sum": 1
}
}
},
{
"$group": {
"_id": null,
"collections": {
"$push": {
$arrayToObject: [
[ { "k": "$$ROOT._id", "v": "$$ROOT.v" } ]
]
}
}
}
},
{
$replaceRoot: {
newRoot: {
$mergeObjects: "$collections"
}
}
}
])
mongoplayground
I have figured a solution after checking for a while.
db.getCollection("collectionName").aggregate(
[
// get all the records with at least one collection name
{
$match: {
"collections.0": { $exists: true }
}
},
// populate the collection record
{
$lookup: {
from: "from_collection",
localField: "localField",
foreignField: "foreignField",
as: "collections"
}
},
// unwind
{ $unwind: "$collections" },
// group by the collections._id
{ $group: { _id: "$collections._id", collections: { $push: "$$ROOT.ID" } } },
// project with collection contains _id, and count
{
$project : {
collections: "$collections",
count: { $size: "$collections" }
}
}
]
).toArray();
output:
[
{
"_id" : ObjectId("61c4c42d68579f00311dd3e1"),
"collections" : [
"015151",
"015152",
"015153"
],
"count" : 3.0
},
{
"_id" : ObjectId("615f38016f40710033699939"),
"collections" : [
"014871"
],
"count" : 1.0
},
{
"_id" : ObjectId("611fed5ee0d12c00337cb009"),
"collections" : [
"014788",
"014786",
"014789",
"014787",
"014884",
"014893",
"014967",
"014968",
"015016",
"015017"
],
"count" : 10.0
}
...
]

Count if a value exists inside the array of objects [ MongoDB ]

I am developing a simple chat application with MongoDB and got stuck into a situation.
My document in database is as
{
"_id" : ObjectId("605a217ed8168f4c262f4782"),
"message" : "Hi, This is a test message",
"created" : ISODate("2021-03-23T17:12:30.000Z"),
"user" : {
"_id" : ObjectId("5977af7df1d8cc4623283b14"),
"name" : "Sender Of Message"
},
"recipients" : [
{
"_id" : ObjectId("5977af7df1d8cc4623283b14"),
"time" : ISODate("2021-03-23T17:12:30.000Z")
},
{
"_id" : ObjectId("5df50a5eaa0e3c3104006101"),
"time" : ISODate("2021-03-23T17:12:35.000Z")
}
],
"target" : {
"_id" : ObjectId("5df50a5eaa0e3c3104006101"),
"name" : "Target Person"
},
"status" : 1
}
When I try to get the last message with the unread count of the user I am always getting 1
Here is the query that I tried on.
db.collection.aggregate([
{ $match: { 'target._id': ObjectId('5df50a5eaa0e3c3104006101'), status: 1 } },
{ $sort: { _id: -1 } },
{
$group: {
_id: '$user._id',
doc: { $first: '$$ROOT' },
unread: {
$sum: {
$cond: {
if: { $ne: [ ObjectId('5df50a5eaa0e3c3104006101'), '$recipients._id' ] },
then: 1,
else: 0
}
}
}
}
}
])
If the collection contains even just the one document above, it is supposed to give 0 as the object inside the recipients array already contains the _id as ObjectId('5df50a5eaa0e3c3104006101'), but I'm getting 1 for the unread count. Any help?
Here is the output that I get from the query
{
"_id" : ObjectId("5977af7df1d8cc4623283b14"),
"doc" : {
"_id" : ObjectId("605a217ed8168f4c262f4782"),
"message" : "Hi, This is a test message",
"created" : ISODate("2021-03-23T17:12:30.000Z"),
"user" : {
"_id" : ObjectId("5977af7df1d8cc4623283b14"),
"name" : "Sender Of Message"
},
"recipients" : [
{
"_id" : ObjectId("5977af7df1d8cc4623283b14"),
"time" : ISODate("2021-03-23T17:12:30.000Z")
},
{
"_id" : ObjectId("5df50a5eaa0e3c3104006101"),
"time" : ISODate("2021-03-23T17:12:35.000Z")
}
],
"target" : {
"_id" : ObjectId("5df50a5eaa0e3c3104006101"),
"name" : "Target Person"
},
"status" : 1
},
"unread" : 1.0
}
I know why its showing with the count as 1
The array recipients contains an object with _id as ObjectId("5977af7df1d8cc4623283b14") inside it, so its a non matching condition. Which is causing the if condition to be satisfied and produce a value 1.
But I need to figure out how to query it to get the actual value.
Please note that I cant use $push operator on recipients array as it might have greater amount of object ( maybe in future )
Thanks for the support, but I have found the answer by myself.
Here is my approch to get the data as per the requirement.
Instead of searching for the records within the array what I did is
Filtered the data array to the _id that I don't need, so the array will have exactly one document or else it will be empty.
When taking the negation of the condition. ie, when there is one value in the array I need the counter to be 0 or else it should be 1
So I used the $size to check the array's size and $filter to filter out the other _ids and then used $sum to increment the counter as required.
db.collection.aggregate([
{ $match: { 'target._id': ObjectId('5df50a5eaa0e3c3104006101'), status: 1 } },
{ $sort: { _id: -1 } },
{
$group: {
_id: '$user._id',
doc: { $first: '$$ROOT' },
unread: {
$sum: {
$cond:{
if: {
$size: {
$filter: {
input: '$recipients',
as: 'item',
cond: { $eq: [ ObjectId('5df50a5eaa0e3c3104006101'), '$$item._id' ] }
}
}
},
then: 0,
else: 1
}
}
}
}
}
])
Try to Use like this:
db.getCollection('test').aggregate([
{ $match: { 'target._id': ObjectId('5df50a5eaa0e3c3104006101'), status: 1
} },
{ $sort: { _id: -1 } },
{ $unwind: { path: "$recipients", preserveNullAndEmptyArrays: true } },
{
$group: {
_id: '$user._id',
doc: { $first: '$$ROOT' },
unread: {
$sum: {
$cond: {
if: { $ne: ['$recipients._id',
ObjectId('5df50a5eaa0e3c3104006101') ] },
then: 1,
else: 0
}
}
}
}
}
])

How can i count total documents and also grouped counts simultanously in mongodb aggregation?

I have a dataset in mongodb collection named visitorsSession like
{ip : 192.2.1.1,country : 'US', type : 'Visitors',date : '2019-12-15T00:00:00.359Z'},
{ip : 192.3.1.8,country : 'UK', type : 'Visitors',date : '2019-12-15T00:00:00.359Z'},
{ip : 192.5.1.4,country : 'UK', type : 'Visitors',date : '2019-12-15T00:00:00.359Z'},
{ip : 192.8.1.7,country : 'US', type : 'Visitors',date : '2019-12-15T00:00:00.359Z'},
{ip : 192.1.1.3,country : 'US', type : 'Visitors',date : '2019-12-15T00:00:00.359Z'}
I am using this mongodb aggregation
[{$match: {
nsp : "/hrm.sbtjapan.com",
creationDate : {
$gte: "2019-12-15T00:00:00.359Z",
$lte: "2019-12-20T23:00:00.359Z"
},
type : "Visitors"
}}, {$group: {
_id : "$country",
totalSessions : {
$sum: 1
}
}}, {$project: {
_id : 0,
country : "$_id",
totalSessions : 1
}}, {$sort: {
country: -1
}}]
using above aggregation i am getting results like this
[{country : 'US',totalSessions : 3},{country : 'UK',totalSessions : 2}]
But i also total visitors also along with result like totalVisitors : 5
How can i do this in mongodb aggregation ?
You can use $facet aggregation stage to calculate total visitors as well as visitors by country in a single pass:
db.visitorsSession.aggregate( [
{
$match: {
nsp : "/hrm.sbtjapan.com",
creationDate : {
$gte: "2019-12-15T00:00:00.359Z",
$lte: "2019-12-20T23:00:00.359Z"
},
type : "Visitors"
}
},
{
$facet: {
totalVisitors: [
{
$count: "count"
}
],
countrySessions: [
{
$group: {
_id : "$country",
sessions : { $sum: 1 }
}
},
{
$project: {
country: "$_id",
_id: 0,
sessions: 1
}
}
],
}
},
{
$addFields: {
totalVisitors: { $arrayElemAt: [ "$totalVisitors.count" , 0 ] },
}
}
] )
The output:
{
"totalVisitors" : 5,
"countrySessions" : [
{
"sessions" : 2,
"country" : "UK"
},
{
"sessions" : 3,
"country" : "US"
}
]
}
You could be better off with two queries to do this.
To save the two db round trips following aggregation can be used which IMO is kinda verbose (and might be little expensive if documents are very large) to just count the documents.
Idea: Is to have a $group at the top to count documents and preserve the original documents using $push and $$ROOT. And then before other matches/filter ops $unwind the created array of original docs.
db.collection.aggregate([
{
$group: {
_id: null,
docsCount: {
$sum: 1
},
originals: {
$push: "$$ROOT"
}
}
},
{
$unwind: "$originals"
},
{ $match: "..." }, //and other stages on `originals` which contains the source documents
{
$group: {
_id: "$originals.country",
totalSessions: {
$sum: 1
},
totalVisitors: {
$first: "$docsCount"
}
}
}
]);
Sample O/P: Playground Link
[
{
"_id": "UK",
"totalSessions": 2,
"totalVisitors": 5
},
{
"_id": "US",
"totalSessions": 3,
"totalVisitors": 5
}
]

MongoDB two groups Aggregate

Aggregation operations process data records and return computed results. Aggregation operations group values from multiple documents together, and can perform a variety of operations on the grouped data to return a single result. MongoDB provides three ways to perform aggregation: the aggregation pipeline, the map-reduce function, and single purpose aggregation methods.
I would like to transform that :
{
"_id" : ObjectId("5836b919885383034437d4a7"),
"Identificador" : "G-3474",
"Miembros" : [
{
"_id" : ObjectId("5836b916885383034437d238"),
"Nombre" : "Pilar",
"Email" : "pcarrillocasa#gmail.es",
"Edad" : 24,
"País" : "España",
"Tipo" : "Usuario individual",
"Apellidos" : "Carrillo Casa",
"Teléfono" : 637567234,
"Ciudad" : "Santander",
"Identificador" : "U-3486",
"Información_creación" : {
"Fecha_creación" : {
"Mes" : 4,
"Día" : 22,
"Año" : 2016
},
"Hora_creación" : {
"Hora" : 15,
"Minutos" : 34,
"Segundos" : 20
}
}
}
}
into that
{
"Nombre_Grupo" : "Amigo invisible"
"Ciudades" : [
{
"Ciudad" : "Madrid",
"Miembros": 30
},
{
"Ciudad" : "Almería",
"Miembros": 10
}
{
"Ciudad" : "Badajoz",
"Miembros": 20
}
]
}
with MongoDB.
I tried with that:
db.Grupos_usuarios.aggregate([
{ $group: { _id: "$Nombre_Grupo",total: { $sum: "$amount" } },
$group: { _id: "$Ciudad",total: { $sum: "$amount" } } }
])
but I could not get what I needed.
May somebody help me to know what I am doing bad?
The following aggregation gets the output you are looking for.
The $unwind stage deconstructs an array field from the input documents to output a document for each element. These documents are used to group by the Miembros.Ciudad and get the total Miembros for each Ciudad. In the second group stage we Pivot data to get all the Ciudades from the previous grouping into an array. The last $project is for formatting the output.
db.test.aggregate( [
{
$unwind: "$Miembros"
},
{
$group: {
_id: "$Miembros.Ciudad",
total: { $sum: 1 }
}
},
{
$group: {
_id: "Amigo invisible",
Ciudades: { $push: { Ciudad: "$_id", Miembros: "$total"} }
}
},
{
$project: {
Nombre_Grupo: "$_id",
Ciudades: 1,
_id: 0
}
}
] )

Cant find duplicate values for array part in mongodb

db.school.find({ "merchant" : "cc8c0421-e7fc-464d-9e1d-37e168b216c3" })
this is an example document from school collection of that query:
{
"_id" : ObjectId("57fafasf2323232323232f57682cd42"),
"status" : "wait",
"merchant" : "cc8c0421-e7fc-464d-9e1d-37e168b216c3",
"isValid" : false,
"fields" : { "schoolid" : {
"value" : "2323232",
"detail" : {
"revisedBy" : "teacher",
"revisionDate" : ISODate("2015-06-24T09:22:44.288+0000")
},
"history" : [
]
}}
}
I want to see which has duplcate schoolid. SO i do this:
db.school.aggregate([
{$match:{ "merchant" : "cc8c0421-e7fc-464d-9e1d-37e168b216c3"
{ $group: {
_id: { fields.schoolid.value: "$fields.schoolid.value" },
count: { $sum: 1 }
} },
{ $match: {
count: { $gte: 2 }
} },
{ $sort : { count : -1} },
{ $limit : 10 }
]);
but it gives error.
a lot of errors for a lot of lines
i tried to do like this
_id: { "fields.schoolid.value": "$fields.schoolid.value" },
or
_id: { 'fields.schoolid.value': "$'fields.schoolid.value'" },
but did not work. ow can i use it?
According to the document you provided, there is no fields field, so the group stage can't work. Your query should be :
db.school.aggregate([
{ $match: { "merchant" : "cc8c0421-e7fc-464d-9e1d-37e168b216c3"}},
{ $group: {
_id: { value: "$fields.schoolid.value" },
count: { $sum: 1 }
} },
{ $match: {
count: { $gte: 2 }
} },
{ $sort : { count : -1} },
{ $limit : 10 }
]);
Also note that fields.schoolid.value is not a valid fieldname, you need to enclode it in "" or to remove the "."