MongoDB count in array very slow - mongodb

My data is as follow:
{
"timestamp":1467349392 ,
"alert_tag":[
"alert_source: WS2",
"alert_severity:warning",
"alert
_object:Sensor-840",
"alert_type_id:2",
"alert_status:resovled"
],
"alert_info":{
"t":1
},
"server_created_timestamp":NumberLong("1467349309392"),
"alert_
id":"20"
}
I have about two million records in the collection. I am trying to count the number of unique alert_status, alert_object, alert_source that matches the query. The following is my aggregation pipeline (match, unwind, group, sort, project, group) and it takes about 40s to complete:
{
"$match":{
"alert_tag":{
"$in":[
"alert_status:pending"
]
}
}
},
{
"$unwind":"$alert_tag"
},
{
"$group":{
"_id":"$alert_tag",
"count":{
"$sum":1
}
}
},
{
"$sort":{
"_id":1
}
},
{
"$project":{
"tmp":{
"alert":"$_id",
"count":"$count"
}
}
},
{
"$group":{
"_id":null,
"total":{
"$sum":1
},
"data":{
"$addToSet":"$tmp"
}
}
}
How can I improve the performance? alert_tag has been indexed

Related

$size operator is inserted as static string in mongodb upsert

In MongoDB, I want to have a document similar to this:
{
"project": "myproject",
"tasks": ["mytask"],
"tasksCount": 1
}
where tasksCount is the length of the tasks array. I use this upsert query as suggested here:
db.tasks.updateOne(
{ project: "myproject" },
{
$addToSet: { tasks: "mytask" },
$set: { tasksCount: { $size: "$tasks" } }
},
{ upsert: true }
)
But the actual document inserted, looks like this:
{
"project": "myproject",
"tasks": ["mytask"],
"tasksCount": {
"$size": "$tasks"
}
}
where the tasksCount is simply the object I pass in, instead of the actual size.
I am using mongo v5.0.8.
As #turvishal mentioned in the comment $size is an aggregation operator , here is an option using update with aggregation framework:
db.collection.update({
project: "myproject"
},
[
{
$project: {
tasks: {
$setUnion: [
"$tasks",
[
"mytask2"
]
]
}
}
},
{
$addFields: {
tasksCount: {
$size: "$tasks"
}
}
}
],
{
upsert: true,
multi: true
})
Explained:
Match all documents with project:"myproject"
Project $setUnion ( addToSet) to add the task to the tasks array.
Replace the field tasksCount with the size of the tasks array.
Upsert:true + multi:true to perform the operation for all matching and insert if there is no matchiing.
Plaground

Mongodb aggregate - using $and and $eq inside a $match operator

I wanted to match and get the documents I wanted based on 2 conditions
data
But it seems like the $eq inside $match isn't doing the right job, I checked the docs many times but can't find what exactly is the problem.
[
{
"episode_title":"Episode Title 1",
"episode_number":"01",
"unique_id":"5D66EEDA9BF055BE80CB997EFD571636C02ED050",
"series_id":1,
"comments_hash":"CBBEA21FE1E4",
"quality":1080,
"thumbnail_url":null,
"thumbnail_version":null,
"original_thumbnail":null,
"watched_by":[
1820242622
]
},
{
"episode_title":"Episode Title 2",
"episode_number":"02",
"unique_id":"12DC0F2B6D1223D8FF6C10189B6CF6DEB4BBA60B",
"series_id":1,
"comments_hash":"EEDA9BF055BE80C",
"quality":720,
"thumbnail_url":null,
"thumbnail_version":null,
"original_thumbnail":null,
"watched_by":[
1820242622
]
},
{
"episode_title":"Episode Title 1",
"episode_number":"01",
"unique_id":"635F12999D0641C95CBBEA21FE1E46BF26BDCCCA",
"series_id":2,
"comments_hash":"7EFD571636",
"quality":1080,
"thumbnail_url":null,
"thumbnail_version":null,
"original_thumbnail":null,
"watched_by":[
1820242622
]
}
]
The data in USERS collection looks something like this:
[
{
"user_id":1820242622,
"full_name":"user 1",
"start_date":"2021-11-18T19:36:27.695953",
"is_banned":false,
"preferred_quality":1080
},
{
"user_id":1820242624,
"full_name":"user 2",
"start_date":"2021-11-18T19:36:27.695953",
"is_banned":false,
"preferred_quality":0
}
]
and I used this code (pymongo):
user_id = 1820242622
series_id = 1
db.data_col.aggregate([
{
"$lookup":{
"from":"USERS",
"pipeline":[
{
"$match":{
"user_id":"user_id"
}
},
{
"$project":{
"preferred_quality":1
}
}
],
"as":"preferred_quality"
}
},
{
"$match":{
"$and":[
{
"$eq":[
"$series_id",
"series_id"
]
},
{
"$in":[
"$quality",
[
0,
"$preferred_quality"
]
]
}
]
}
},
{
"$set":{
"preferred_quality":"$preferred_quality"
}
},
{
"$set":{
"user_watched":{
"$in":[
"user_id",
"$watched_by"
]
}
}
},
{
"$set":{
"watched_by":"$$REMOVE"
}
}
])
but I'm getting:
raise OperationFailure(errmsg, code, response, max_wire_version) pymongo.errors.OperationFailure: unknown top level operator: $eq. If you have a field name that starts with a '$' symbol, consider using $getField or $setField., full error: {'ok': 0.0, 'errmsg': "unknown top level operator: $eq. If you have a field name that starts with a '$' symbol, consider using $getField or $setField.", 'code': 2, 'codeName': 'BadValue', '$clusterTime': {'clusterTime': Timestamp(1648755167, 3), 'signature': {'hash': b"\x81\xd59\xae\x04'\x15\xac0\x08X6\x86\xc8\xa4\xf22\x07\x87\xd2", 'keyId': 7037880200322875408}}, 'operationTime': Timestamp(1648755167, 3)}
I was expecting something like this:
{
"episode_title":"Episode Title 1",
"episode_number":"01",
"unique_id":"635F12999D0641C95CBBEA21FE1E46BF26BDCCCA",
"series_id":1,
"comments_hash":"7EFD571636",
"quality":1080,
"thumbnail_url":null,
"thumbnail_version":null,
"original_thumbnail":null,
"preferred_quality":1080,
"user_watched":false
}
db.data_col.aggregate([
{
$match: {
"unique_id": "635F12999D0641C95CBBEA21FE1E46BF26BDCCCA"
}
},
{
$lookup: {
from: "USERS",
localField: "watched_by",
foreignField: "user_id",
as: "preferred_quality"
}
},
{
$set: {
preferred_quality: {
$first: "$preferred_quality.preferred_quality"
}
}
}
])
mongoplayground

Mongodb Joining multi document with $lookup aggregation operator and search in the joined document using $or at once

Let's say I have a Purchase order document and this purchase order document has Ids of the supplier and outlet which both of them are separated document (*not embedded).
What I want when user query like this from client-side "give me the purchase order documents which (purchase-order reference-number or supplier name or outlet name) is like "Otilia"
The aggregation query I made so far is this:
[
{
"$lookup":{
"from":"outlets",
"localField":"outlet",
"foreignField":"_id",
"as":"outlets"
}
},
{
"$lookup":{
"from":"suppliers",
"localField":"supplier",
"foreignField":"_id",
"as":"suppliers"
}
},
{
"$match":{
"$and":[
{
"$or":[
{
"outlets.name":{
"$regex":".*Otilia.*",
"$options":"i"
},
"referenceNumber":{
"$regex":".*Otilia.*",
"$options":"i"
},
"suppliers.name":{
"$regex":".*Otilia.*",
"$options":"i"
}
}
]
}
]
}
},
{
"$sort":{
"outlets.name":1,
"_id":1
}
}
]
What is working:
if I remove the tow other objects from $or array which looks like this
[
{
"$lookup":{
"from":"outlets",
"localField":"outlet",
"foreignField":"_id",
"as":"outlets"
}
},
{
"$lookup":{
"from":"suppliers",
"localField":"supplier",
"foreignField":"_id",
"as":"suppliers"
}
},
{
"$match":{
"$and":[
{
"$or":[
{
"suppliers.name":{
"$regex":".*Otilia.*",
"$options":"i"
}
}
]
}
]
}
},
{
"$sort":{
"outlets.name":1,
"_id":1
}
}
]
Note: I used $and because it could be more filters from client-side like purchase order status and more.
Purchase Order document Sample
Supplier document Sample:
outlet document Sample:
The result I want t to achieve is the matched purchase orders documents by the query.
Solved by wrapping every $or expression with {}.
I just forgot to wrap every $or expression in brackets.
the correct format of the query is this:
[
{
"$lookup":{
"from":"outlets",
"localField":"outlet",
"foreignField":"_id",
"as":"outlets"
}
},
{
"$lookup":{
"from":"suppliers",
"localField":"supplier",
"foreignField":"_id",
"as":"suppliers"
}
},
{
"$match":{
"$and":[
{
"$or":[
{
"orderNumber":{
"$regex":".*Celia Pouros.*",
"$options":"i"
}
},
{
"supplierInvoice":{
"$regex":".*Celia Pouros.*",
"$options":"i"
}
},
{
"suppliers.name":{
"$regex":".*Celia Pouros.*",
"$options":"i"
}
}
]
}
]
}
},
{
"$sort":{
"outlets.name":1,
"_id":1
}
}
]

return only the last level of the embedded property that is searched in a document

I have these documents.
db.test.find({"house.floor":1})
db.test.insertMany([{
"name":"homer",
"house": {
"floor": 1,
"room":
{
"bed": "bed_pink",
"chair":"chair_pink"
}
}
},
{
"name":"marge",
"house": {
"floor": 1,
"room":
{
"bed": "bed_blue",
"chair":"chair_red"
}
}
}]
)
db.test.find({"house.room.bed":"bed_blue"})
I want to return only the value of the last level of my search. in this case:
{
"bed": "bed_blue",
"chair":"chair_red"
}
the answer I get, is the whole document, I want to advance to a certain level of the query. how can I do it?
You can use aggregation for it
db.test.aggregate([
{ $match: { "house.room.bed":"bed_blue" } },
{ $replaceRoot: { newRoot: "$house.room" } }
])
You can use aggregate together with $match and $project for this as follows:
db.test.aggregate([
{
$match: {
"house.room.bed": "bed_blue"
}
},
{
$project: {
"_id": 0,
"bed": "$house.room.bed",
"chair": "$house.room.chair"
}
}
])
Further, you can modify the $project part as needed.
Here is a demo: https://mongoplayground.net/p/Fvll0LFIvQy

MongoDb how to count fields

I have some data like this:
{
user_id:1,
group_id:123,
discription:null
},
{
user_id:1,
group_id:321,
discription:null
},
{
user_id:1,
group_id:123,
discription:"text"
},
{
user_id:1,
group_id:321,
discription:"another text"
},
{
user_id:1,
group_id:321,
discription:"another another text"
},
etc..
I want get all groups (group by group_id), count of document in each group, and count of documents in that group that have "discription" with null value and not null value.
So i need results like:
[
[group_id:123, count:2, isNull:1, isNotNull:1],
[group_id:321, count:3, isNull:1, isNotNull:2]
]
I know how to group fields by "group_id" and get the "count", but I don't know how to get the info about "description".
db.collection.aggregate([
{
$match:{
user_id:1
}
},
{
$group:{
_id:'$group_id',
group_id:{$first:'$group_id'},
count:{'$sum':1}
}
}
])
Please check this Query
db.testing.aggregate([{
$match:{
user_id:1
}
},
{$project:{_id:0,user_id:1,group_id: 1,description: { $ifNull: [ "$discription", 1 ] }}},
{
$group:{
_id:'$group_id',
group_count:{'$sum':1},
IsNull:{"$sum":"$description"}
}
},
{$project:{_id:0,group_id:"$_id", count:"$group_count", isNull:"$IsNull", isNotNull:{ $subtract:["$group_count","$IsNull"]}}}
])