Mongodb Joining multi document with $lookup aggregation operator and search in the joined document using $or at once - mongodb

Let's say I have a Purchase order document and this purchase order document has Ids of the supplier and outlet which both of them are separated document (*not embedded).
What I want when user query like this from client-side "give me the purchase order documents which (purchase-order reference-number or supplier name or outlet name) is like "Otilia"
The aggregation query I made so far is this:
[
{
"$lookup":{
"from":"outlets",
"localField":"outlet",
"foreignField":"_id",
"as":"outlets"
}
},
{
"$lookup":{
"from":"suppliers",
"localField":"supplier",
"foreignField":"_id",
"as":"suppliers"
}
},
{
"$match":{
"$and":[
{
"$or":[
{
"outlets.name":{
"$regex":".*Otilia.*",
"$options":"i"
},
"referenceNumber":{
"$regex":".*Otilia.*",
"$options":"i"
},
"suppliers.name":{
"$regex":".*Otilia.*",
"$options":"i"
}
}
]
}
]
}
},
{
"$sort":{
"outlets.name":1,
"_id":1
}
}
]
What is working:
if I remove the tow other objects from $or array which looks like this
[
{
"$lookup":{
"from":"outlets",
"localField":"outlet",
"foreignField":"_id",
"as":"outlets"
}
},
{
"$lookup":{
"from":"suppliers",
"localField":"supplier",
"foreignField":"_id",
"as":"suppliers"
}
},
{
"$match":{
"$and":[
{
"$or":[
{
"suppliers.name":{
"$regex":".*Otilia.*",
"$options":"i"
}
}
]
}
]
}
},
{
"$sort":{
"outlets.name":1,
"_id":1
}
}
]
Note: I used $and because it could be more filters from client-side like purchase order status and more.
Purchase Order document Sample
Supplier document Sample:
outlet document Sample:
The result I want t to achieve is the matched purchase orders documents by the query.

Solved by wrapping every $or expression with {}.
I just forgot to wrap every $or expression in brackets.
the correct format of the query is this:
[
{
"$lookup":{
"from":"outlets",
"localField":"outlet",
"foreignField":"_id",
"as":"outlets"
}
},
{
"$lookup":{
"from":"suppliers",
"localField":"supplier",
"foreignField":"_id",
"as":"suppliers"
}
},
{
"$match":{
"$and":[
{
"$or":[
{
"orderNumber":{
"$regex":".*Celia Pouros.*",
"$options":"i"
}
},
{
"supplierInvoice":{
"$regex":".*Celia Pouros.*",
"$options":"i"
}
},
{
"suppliers.name":{
"$regex":".*Celia Pouros.*",
"$options":"i"
}
}
]
}
]
}
},
{
"$sort":{
"outlets.name":1,
"_id":1
}
}
]

Related

How to compare fields from different collections in mongodb

Here, I have multiple fields from multiple tables those values needs to compared and need to display desired result.
SQL QUERY:
select pd.service_id,ps.service_id from player pd, service ps where pd.subject_id=ps.subject_id and pd.service_id = ps.service_id
Mongo query:
db.player.aggregate([
{
"$lookup":{
"from":"service",
"localField":"player.subject_id",
"foreignField":"subject_id",
"as":"ps"
}
},
{
"$unwind":"$ps"
},
{
"$match":{
"service_id":{
"$eq": "ps.service_id"
}
}
}
];
sample input records:
player:
[{subject_id:23,service_id:1},{subject_id:76,service_id:9}]
service:
[{subject_id:76,service_id:9},{subject_id:99,service_id:10}]
The match is not working. I have to match service_id's of both collections. Need to get matched records. But not able to see any result. Can anyone please help me to find out the mistake...
In your query, if you want to compare 2 values from the document itself, you need to use $expr operator
{
"$match":{
"$expr":{
"$eq": ["$service_id", "$ps.service_id"]
}
}
}
MongoPlayground
Alternative solution: You need to use Uncorrelated sub-query to "* join" with 2 o more conditions
db.player.aggregate([
{
"$lookup": {
"from": "service",
"let": {
subject_id: "$subject_id",
service_id: "$service_id"
},
"pipeline": [
{
$match: {
$expr: {
$and: [
{
$eq: [
"$$subject_id",
"$subject_id"
]
},
{
$eq: [
"$$service_id",
"$service_id"
]
}
]
}
}
}
],
"as": "ps"
}
},
// Remove non matched results
{
$match: {
"ps.0": {
$exists: true
}
}
},
// Remove temporal "ps" field
{
$addFields: {
"ps": "$$REMOVE"
}
}
])
MongoPlayground

Mongodb aggregate - using $and and $eq inside a $match operator

I wanted to match and get the documents I wanted based on 2 conditions
data
But it seems like the $eq inside $match isn't doing the right job, I checked the docs many times but can't find what exactly is the problem.
[
{
"episode_title":"Episode Title 1",
"episode_number":"01",
"unique_id":"5D66EEDA9BF055BE80CB997EFD571636C02ED050",
"series_id":1,
"comments_hash":"CBBEA21FE1E4",
"quality":1080,
"thumbnail_url":null,
"thumbnail_version":null,
"original_thumbnail":null,
"watched_by":[
1820242622
]
},
{
"episode_title":"Episode Title 2",
"episode_number":"02",
"unique_id":"12DC0F2B6D1223D8FF6C10189B6CF6DEB4BBA60B",
"series_id":1,
"comments_hash":"EEDA9BF055BE80C",
"quality":720,
"thumbnail_url":null,
"thumbnail_version":null,
"original_thumbnail":null,
"watched_by":[
1820242622
]
},
{
"episode_title":"Episode Title 1",
"episode_number":"01",
"unique_id":"635F12999D0641C95CBBEA21FE1E46BF26BDCCCA",
"series_id":2,
"comments_hash":"7EFD571636",
"quality":1080,
"thumbnail_url":null,
"thumbnail_version":null,
"original_thumbnail":null,
"watched_by":[
1820242622
]
}
]
The data in USERS collection looks something like this:
[
{
"user_id":1820242622,
"full_name":"user 1",
"start_date":"2021-11-18T19:36:27.695953",
"is_banned":false,
"preferred_quality":1080
},
{
"user_id":1820242624,
"full_name":"user 2",
"start_date":"2021-11-18T19:36:27.695953",
"is_banned":false,
"preferred_quality":0
}
]
and I used this code (pymongo):
user_id = 1820242622
series_id = 1
db.data_col.aggregate([
{
"$lookup":{
"from":"USERS",
"pipeline":[
{
"$match":{
"user_id":"user_id"
}
},
{
"$project":{
"preferred_quality":1
}
}
],
"as":"preferred_quality"
}
},
{
"$match":{
"$and":[
{
"$eq":[
"$series_id",
"series_id"
]
},
{
"$in":[
"$quality",
[
0,
"$preferred_quality"
]
]
}
]
}
},
{
"$set":{
"preferred_quality":"$preferred_quality"
}
},
{
"$set":{
"user_watched":{
"$in":[
"user_id",
"$watched_by"
]
}
}
},
{
"$set":{
"watched_by":"$$REMOVE"
}
}
])
but I'm getting:
raise OperationFailure(errmsg, code, response, max_wire_version) pymongo.errors.OperationFailure: unknown top level operator: $eq. If you have a field name that starts with a '$' symbol, consider using $getField or $setField., full error: {'ok': 0.0, 'errmsg': "unknown top level operator: $eq. If you have a field name that starts with a '$' symbol, consider using $getField or $setField.", 'code': 2, 'codeName': 'BadValue', '$clusterTime': {'clusterTime': Timestamp(1648755167, 3), 'signature': {'hash': b"\x81\xd59\xae\x04'\x15\xac0\x08X6\x86\xc8\xa4\xf22\x07\x87\xd2", 'keyId': 7037880200322875408}}, 'operationTime': Timestamp(1648755167, 3)}
I was expecting something like this:
{
"episode_title":"Episode Title 1",
"episode_number":"01",
"unique_id":"635F12999D0641C95CBBEA21FE1E46BF26BDCCCA",
"series_id":1,
"comments_hash":"7EFD571636",
"quality":1080,
"thumbnail_url":null,
"thumbnail_version":null,
"original_thumbnail":null,
"preferred_quality":1080,
"user_watched":false
}
db.data_col.aggregate([
{
$match: {
"unique_id": "635F12999D0641C95CBBEA21FE1E46BF26BDCCCA"
}
},
{
$lookup: {
from: "USERS",
localField: "watched_by",
foreignField: "user_id",
as: "preferred_quality"
}
},
{
$set: {
preferred_quality: {
$first: "$preferred_quality.preferred_quality"
}
}
}
])
mongoplayground

Merge arrays by matching similar values in mongodb

This is an extension of the below question.
Filter arrays in mongodb
I have a collection where each document contains 2 arrays as below.
{
users:[
{
id:1,
name:"A"
},
{
id:2,
name:"B"
},
{
id:3,
name:"C"
}
]
priv_users:[
{
name:"X12/A",
priv:"foobar"
},
{
name:"Y34.B",
priv:"foo"
}
]
}
From the linked question, I learnt to use $map to merge 2 document arrays. But I can't figure out to match users.name to priv_users.name to get below output.
{
users:[
{
id:1,
name:"A",
priv:"foobar"
},
{
id:2,
name:"B",
priv:"foo"
},
{
id:3,
name:"C"
}
]
}
users.name and priv_users.name don't have a consistent pattern, but users.name exists within priv_users.name.
MongoDB version is 4.0
This may not be as generic but will push you in the right direction. Consider using the operators $mergeObjects to merge the filtered document from the priv_users array with the document in users.
Filtering takes the $substr of the priv_users name field and compares it with the users name field. The resulting pipeline will be as follows
db.collection.aggregate([
{ '$addFields': {
'users': {
'$map': {
'input': '$users',
'in': {
'$mergeObjects': [
{
'$arrayElemAt': [
{
'$filter': {
'input': '$priv_users',
'as': 'usr',
'cond': {
'$eq': [
'$$this.name',
{ '$substr': [
'$$usr.name', 4, -1
] }
]
}
}
},
0
]
},
'$$this'
]
}
}
}
} }
])
If using MongoDB 4.2 and newer versions, consider using $regexMatch operator for matching the priv_users name field with the users name field as the regex pattern. Your $cond operator now becomes:
'cond': {
'$regexMatch': {
'input': '$$usr.name',
'regex': '$$this.name',
'options': "i"
}
}

MongoDb how to count fields

I have some data like this:
{
user_id:1,
group_id:123,
discription:null
},
{
user_id:1,
group_id:321,
discription:null
},
{
user_id:1,
group_id:123,
discription:"text"
},
{
user_id:1,
group_id:321,
discription:"another text"
},
{
user_id:1,
group_id:321,
discription:"another another text"
},
etc..
I want get all groups (group by group_id), count of document in each group, and count of documents in that group that have "discription" with null value and not null value.
So i need results like:
[
[group_id:123, count:2, isNull:1, isNotNull:1],
[group_id:321, count:3, isNull:1, isNotNull:2]
]
I know how to group fields by "group_id" and get the "count", but I don't know how to get the info about "description".
db.collection.aggregate([
{
$match:{
user_id:1
}
},
{
$group:{
_id:'$group_id',
group_id:{$first:'$group_id'},
count:{'$sum':1}
}
}
])
Please check this Query
db.testing.aggregate([{
$match:{
user_id:1
}
},
{$project:{_id:0,user_id:1,group_id: 1,description: { $ifNull: [ "$discription", 1 ] }}},
{
$group:{
_id:'$group_id',
group_count:{'$sum':1},
IsNull:{"$sum":"$description"}
}
},
{$project:{_id:0,group_id:"$_id", count:"$group_count", isNull:"$IsNull", isNotNull:{ $subtract:["$group_count","$IsNull"]}}}
])

MongoDB count in array very slow

My data is as follow:
{
"timestamp":1467349392 ,
"alert_tag":[
"alert_source: WS2",
"alert_severity:warning",
"alert
_object:Sensor-840",
"alert_type_id:2",
"alert_status:resovled"
],
"alert_info":{
"t":1
},
"server_created_timestamp":NumberLong("1467349309392"),
"alert_
id":"20"
}
I have about two million records in the collection. I am trying to count the number of unique alert_status, alert_object, alert_source that matches the query. The following is my aggregation pipeline (match, unwind, group, sort, project, group) and it takes about 40s to complete:
{
"$match":{
"alert_tag":{
"$in":[
"alert_status:pending"
]
}
}
},
{
"$unwind":"$alert_tag"
},
{
"$group":{
"_id":"$alert_tag",
"count":{
"$sum":1
}
}
},
{
"$sort":{
"_id":1
}
},
{
"$project":{
"tmp":{
"alert":"$_id",
"count":"$count"
}
}
},
{
"$group":{
"_id":null,
"total":{
"$sum":1
},
"data":{
"$addToSet":"$tmp"
}
}
}
How can I improve the performance? alert_tag has been indexed