Performing Repeated filtering in mongodb - mongodb

I want to get the names of people who work in bmw and use these names as filter to find and return the documnents containing the field "car". So the end result must be the last two documents given in this example.
[
{"_id": "235", "name": "indu", "dob": "31/4/15", "company": "bmw"},
{"_id": "236", "name": "prith", "dob": "01/4/98", "company": "bmw"},
{"_id": "237", "name": "rames", "dob": "07/4/00", "company": "renault"},
{"_id": "238", "name": "indu", "salary": "10,000", "car": "yes", "married": "yes"},
{"_id": "239", "name": "prith", "salary": "80,000", "car": "yes", "children": "no"}
]
I appreciate your help, Thanks in advance

You want to use $exists
names = db.collection.distinct{"name", {"company": "bmw"})
db.collection.find({"car": {"$exists": true}, "name": {"$in": names}})
You can also do it in 1 aggregation call although I would not recommend it as It's less efficient.
db.collection.aggregate([
{
"$match": {
"company": "bmw"
}
},
{
$lookup: {
from: "this_collection",
localField: "name",
foreignField: "name",
as: "roots"
}
},
{
"$unwind": "$roots"
},
{
"$replaceRoot": {
"newRoot": "$roots"
}
},
{
"$match": {
"car": {"$exists": true}
}
}
])

Related

Join multiple collections in MongoDB

Greetings amigo i have one question related joining multiple collection in MongoDb
i have collection schema something like below
Posts Collection
{
"type": "POST_TYPE",
"_id": "63241dffb0f6770c23663230",
"user_id": "63241dffb0f6770c23663230",
"post_id": "63241dffb0f6770c23663230",
"likes": 50
}
Post Types: 1. Event
{
"date": "2022-09-16T07:07:18.242+00:00",
"_id": "63241dffb0f6770c23663230",
"user_id": "63241dffb0f6770c23663230",
"venue": "Some Place",
"lat": "null",
"long": "null",
}
Post Types: 2. Poll
{
"created_date": "2022-09-16T07:07:18.242+00:00",
"_id": "63241dffb0f6770c23663230",
"user_id": "63241dffb0f6770c23663230",
"question": "Question??????",
"poll_opt1": "Yes",
"poll_opt2": "No",
"poll_opt1_count": "5",
"poll_opt2_count": "2"
}
now i have to join Post collection with respective collection e.g.
"post_id" to Event::_id or Poll::_id with condition to Post::type
i have tried aggregation but it does not gave expected output.
i am trying to get output something like below
[
{
"type": "event",
"_id": "63241dffb0f6770c23663230",
"user_id": "63241dffb0f6770c23663230",
"post_id": {
"date": "2022-09-16T07:07:18.242+00:00",
"_id": "63241dffb0f6770c23663230",
"user_id": "63241dffb0f6770c23663230",
"venue": "Some Place",
"lat": "null",
"long": "null"
},
"likes": 50
},
{
"type": "poll",
"_id": "63241dffb0f6770c23663230",
"user_id": "63241dffb0f6770c23663230",
"post_id": {
"created_date": "2022-09-16T07:07:18.242+00:00",
"_id": "63241dffb0f6770c23663230",
"user_id": "63241dffb0f6770c23663230",
"question": "Question??????",
"poll_opt1": "Yes",
"poll_opt2": "No",
"poll_opt1_count": "5",
"poll_opt2_count": "2"
},
"likes": 50
}
]
is there any efficient way to achieve this or better MongoDb schema to manage these types of records?
You can try something like this, using $facet:
db.posts.aggregate([
{
"$facet": {
"eventPosts": [
{
"$match": {
type: "event"
},
},
{
"$lookup": {
"from": "events",
"localField": "post_id",
"foreignField": "_id",
"as": "post_id"
}
}
],
"pollPosts": [
{
"$match": {
type: "poll"
},
},
{
"$lookup": {
"from": "poll",
"localField": "post_id",
"foreignField": "_id",
"as": "post_id"
}
}
]
}
},
{
"$addFields": {
"doc": {
"$concatArrays": [
"$pollPosts",
"$eventPosts"
]
}
}
},
{
"$unwind": "$doc"
},
{
"$replaceRoot": {
"newRoot": "$doc"
}
},
{
"$addFields": {
"post_id": {
"$cond": {
"if": {
"$eq": [
{
"$size": "$post_id"
},
0
]
},
"then": {},
"else": {
"$arrayElemAt": [
"$post_id",
0
]
}
}
}
}
}
])
We do the following, in the query:
Perform two $lookups for the different post_type within $facet. This unfortunately will increase, with the different values of post_type.
Then we combine all the arrays obtained from $facet, using $concatArray.
Then we unwind the concatenated array, and bring the nested document to the root using $replaceRoot.
Finally, for post_id we pick the first array element if it exists, to match the desired output.
Playground link.

Compare duplicates by Score By Not Max Field or Earlier Date

I need to compare duplicated documents and get the duplicated ones with the Lowest Score.
If the Score between two duplicates is Equal, then get the one with earlier date.
{
"_id": UUID("c77c72de-edd8-4576-a72c-983cf93a0f31"),
"DocumentId": "05240423067",
"Name": "John Doe",
"CreationDate": ISODate("0001-01-01T00:00:00.000+00:00"),
"Score": 5,
},
{
"_id": UUID("b5a7d404-a341-45dd-b875-864cd1e6bda2"),
"DocumentId": "05240423067",
"Name": "John Doe",
"CreationDate": ISODate("2021-07-17T00:00:00.000+00:00"),
"Score": 2
},
{
"_id": UUID("9efddd23-4b6b-4e96-ab43-b24a080107db"),
"DocumentId": "05240423067",
"Name": "John Doe",
"CreationDate": ISODate("2021-07-10T00:00:00.000+00:00"),
"Score": 2
},
{
"_id": UUID("f1a063a5-f9dd-4998-b6aa-df2071dd8677"),
"DocumentId": "88313825863",
"Name": "Marcus Joseph",
"CreationDate": ISODate("2021-07-17T00:00:00.000+00:00"),
"Score": 2
},
{
"_id": UUID("e3262f8e-bd6a-49e8-abe5-c3c1a4e49900"),
"DocumentId": "88313825863",
"Name": "Marcus Joseph",
"CreationDate": ISODate("0001-01-01T00:00:00.000+00:00"),
"Score": 1
}
Later, the resulting documents will be deleted.
Expected Result:
{
"_id": UUID("b5a7d404-a341-45dd-b875-864cd1e6bda2"),
"DocumentId": "05240423067",
"Name": "John Doe",
"CreationDate": ISODate("2021-07-17T00:00:00.000+00:00"),
"Score": 2 // Return Documents with the **Lowest Score**
},
{
"_id": UUID("9efddd23-4b6b-4e96-ab43-b24a080107db"),
"DocumentId": "05240423067",
"Name": "John Doe",
"CreationDate": ISODate("2021-07-10T00:00:00.000+00:00"),
"Score": 2 // Return Documents with the **Lowest Score**
},
{
"_id": UUID("e3262f8e-bd6a-49e8-abe5-c3c1a4e49900"),
"DocumentId": "88313825863",
"Name": "Marcus Joseph",
"CreationDate": ISODate("0001-01-01T00:00:00.000+00:00"),
"Score": 2 // If both Scores Equal, Compare CreationDate earlier
}
Mongo Version 4.2.21
This would be easier with some of the newer "$group" accumulators introduced in more recent versions of MongoDB, but here's one way you could do it.
db.collection.aggregate([
{
"$group": {
"_id": "$DocumentId",
"count": {"$sum": 1},
"docs": {"$push": "$$ROOT"}
}
},
{ // if only 1, keep it
"$match": {
"$expr": {"$gt": ["$count", 1]}
}
},
{ // find the doc to keep
"$set": {
"keepDoc": {
"$reduce": {
"input": "$docs",
"initialValue": {
"Score": {"$minKey": 1}
},
"in": {
"$switch": {
"branches": [
{
"case": {"$gt": ["$$this.Score", "$$value.Score"]},
"then": "$$this"
},
{
"case": {"$eq": ["$$this.Score", "$$value.Score"]},
"then": {
"$cond": [
{"$gt": ["$$this.CreationDate", "$$value.CreationDate"]},
"$$this",
"$$value"
]
}
}
],
"default": "$$value"
}
}
}
}
}
},
{ // get docs other than keepDoc
"$project": {
"_id": 0,
"expiredDocs": {
"$filter": {
"input": "$docs",
"cond": {"$ne": ["$$this", "$keepDoc"]}
}
}
}
},
{"$unwind": "$expiredDocs"},
{"$replaceWith": "$expiredDocs"}
])
Try it on mongoplayground.net.
N.B.: On mongoplayground.net, there's no easy way that I know of to enter binary UUID values in the BSON configuration, so I just used strings. It should be inconsequential to the pipeline.

MongoDB query: aggregate with "findOne"

I'm trying to make a query on MongoDB 3.4 where I add a field for one specific element of an array. Example of the object:
{
"_id": 1,
"people": [
{
"name": "Jhon Smith",
"age": "30",
"state": "NY"
},{
"name": "Clint Mercer",
"age": "50",
"state": "NY"
},{
"name": "Walter Smith",
"age": "40",
"state": "WI"
}
]
}
And I want to make a query where I'll add to this document an attribute with the first person with "Smith" in it's name. Example:
{
"_id": 1,
"people": [
{
"name": "Jhon Smith",
"age": "30",
"state": "NY"
},{
"name": "Clint Mercer",
"age": "50",
"state": "NY"
},{
"name": "Walter Smith",
"age": "40",
"state": "WI"
}
],
"firstSmith": {
"name": "Jhon Smith",
"age": "30",
"state": "NY"
}
}
I already have the _id of the document I want, but I can't understand how to make a query like this. I'm trying using aggregate with "$match" for the id and "$addFields" after, but I can't make a query that works for this field to find exactly what I want. I think it would be similar to the "findOne" query, but I can't find anything that works on "$addFields".
Obs: I DON'T want the "firstSmith" to be an array with just one "people" inside, I want it as is in the example.
I'd appreciate some help with this one.
$match - to filter the relevant document
$filter with $regexMatch - to filter people array by the name property
arrayElemAt - to get only the first element of above array
$addFields - to add new field with value from above result
db.collection.aggregate([
{
"$match": {
"_id": 1
}
},
{
"$addFields": {
"firstSmith": {
"$arrayElemAt": [
{
"$filter": {
"input": "$people",
"cond": {
"$regexMatch": {
"input": "$$this.name",
"regex": "Smith"
}
}
}
},
0
]
}
}
}
])
Working example

MongoDB aggregation lookup objects in specific date range

I have some users and orders made by them:
db={
orders: [
{
"_id": "wJNEiSYwBd5ozGtLX",
"orderId": 52713,
"retailerId": 1320,
"createdAt": ISODate("2020-01-31T04:34:13.790Z"),
"status": "closed"
},
{
"_id": "wJNEiSYwBd5ozGtLX2",
"orderId": 52714,
"retailerId": 1320,
"createdAt": ISODate("2021-03-31T04:34:13.790Z"),
"status": "closed"
}
],
users: [
{
"_id": "2gSznevqwkGTxLRvL",
"createdAt": ISODate("2018-04-10T08:33:13.455Z"),
"username": "retailer#gmail.com",
"info": {
"retailerId": 1320,
},
"settings": {},
"status": "active",
}
]
}
If I try to aggregate orders into users:
db.users.aggregate([
{
"$lookup": {
"from": "orders",
"localField": "info.retailerId",
"foreignField": "retailerId",
"as": "orders"
}
},
])
I can get all orders merged into users like this:
[
{
"_id": "2gSznevqwkGTxLRvL",
"createdAt": ISODate("2018-04-10T08:33:13.455Z"),
"info": {
"retailerId": 1320
},
"orders": [
{
"_id": "wJNEiSYwBd5ozGtLX",
"createdAt": ISODate("2020-01-31T04:34:13.79Z"),
"orderId": 52713,
"retailerId": 1320,
"status": "closed"
},
{
"_id": "wJNEiSYwBd5ozGtLX2",
"createdAt": ISODate("2021-03-31T04:34:13.79Z"),
"orderId": 52714,
"retailerId": 1320,
"status": "closed"
}
],
"settings": {},
"status": "active",
"username": "retailer#gmail.com"
}
]
But I want to only merge orders from last month, not all orders into users.
How can I specify the date range?
https://mongoplayground.net/p/mZlDHQf2thN
Demo - https://mongoplayground.net/p/-hjGipqQPJq
https://docs.mongodb.com/manual/reference/operator/aggregation/lookup/#join-conditions-and-uncorrelated-sub-queries
To perform uncorrelated subqueries between two collections as well as allow other join conditions besides a single equality match, the $lookup stage has the following syntax:
Use $lookup in the pipeline style -
db.users.aggregate([
{
"$lookup": {
"from": "orders",
"let": { "rId": "$info.retailerId" },
"pipeline": [
{
$match: {
$expr: { $eq: [ "$retailerId","$$rId" ] },
createdAt: { $gte: ISODate("2021-03-01T00:00:00.000Z"), $lt: ISODate("2021-04-01T00:00:00.000Z") } // write date range query here
}
}
],
"as": "orders"
}
}
])

Mongo Aggregate Combine Two Documents

Once I've unwound a sub-document array, how do I put it back together with all the original root fields?
Consider the following Tasks data set:
[
{
"_id": "5e95bb1cf36c0ab3247036bd",
"name": "Task A",
"org": "5e95b9894a0aa0b30dfcbc0b",
"creator": "5e117e5cd90de7187b000d87"
},
{
"_id": "5e95bb30f36c0ab3247036be",
"name": "Task B1",
"org": "5e95b9894a0aa0b30dfcbc0b",
"creator": "5e117e5cd90de7187b000d87",
"parent": "5e95bb1cf36c0ab3247036bd"
},
{
"_id": "5e95bb35f36c0ab3247036bf",
"name": "Task B2",
"org": "5e95b9894a0aa0b30dfcbc0b",
"creator": "5e117e5cd90de7187b000d87",
"parent": "5e95bb1cf36c0ab3247036bd"
}
]
So, then I run $graphLookup to get the parent task and populate it's children and then $unwind it and populate the creator field:
[
{
"$match": {
"parent": {
"$exists": false
}
}
},
{
"$graphLookup": {
"from": "tasks",
"startWith": "$_id",
"connectFromField": "_id",
"connectToField": "parent",
"as": "children"
}
},
{
"$unwind": {
"path": "$children"
}
},
{
"$lookup": {
"from": "users",
"localField": "children.creator",
"foreignField": "_id",
"as": "children.creator"
}
},
{
"$unwind": {
"path": "$children.creator"
}
}
]
Which returns the following documents:
[
{
"_id": "5e95bb1cf36c0ab3247036bd",
"name": "Task A",
"org": "5e95b9894a0aa0b30dfcbc0b",
"creator": "5e117e5cd90de7187b000d87",
"children": [
{
"_id": "5e95bb30f36c0ab3247036be",
"name": "Task B1",
"org": "5e95b9894a0aa0b30dfcbc0b",
"creator": {
"name": "Jack Frost"
},
"parent": "5e95bb1cf36c0ab3247036bd"
}
]
},
{
"_id": "5e95bb1cf36c0ab3247036bd",
"name": "Task A",
"org": "5e95b9894a0aa0b30dfcbc0b",
"creator": "5e117e5cd90de7187b000d87",
"children": [
{
"_id": "5e95bb35f36c0ab3247036bf",
"name": "Task B2",
"org": "5e95b9894a0aa0b30dfcbc0b",
"creator": {
"name": "Bill Nye"
},
"parent": "5e95bb1cf36c0ab3247036bd"
}
]
},
]
Lastly, I need to merge all of these duplicate documents back together and join the $children. This is the part I can't figure out. Below is some junk I'm trying but it seems messy to have to specifically list every property.
Is there a better way to combine multiple (mostly) matching docs?
[
...
{
"$group": {
"_id": "$_id",
"name": {
"$mergeObjects": "$properties"
},
"watchers": {
"$addToSet": "$watchers"
},
"assignees": {
"$addToSet": "$assignees"
},
"org": {
"$addToSet": "$$ROOT.org"
},
"children": {
"$push": "$children"
}
}
}
]
Answering my own question here, the best solution I can find is to specify each property but pass it the $first operator. This will ensure that the original value will be passed through.
{
$group: {
_id: '$_id',
name: {$first: '$name'},
org: {$first: '$org'},
creator: {$first: '$creator'},
children: {$push: '$children'}
}
}