MongoDB query with conditional group by statement - mongodb

I need to export customer records from database of mongoDB. Exported customer records should not have duplicated values. "firstName+lastName+code" is the key to DE-duped the record and If there are two records present in database with same key then I need to give preference to source field with value other than email.
customer (id,firstName,lastName,code,source) collection is this.
If there are record 3 records with same unique key and 3 different sources then i need to choose only one record between 2 sources(TV,internet){or if there are n number of sources i need the one record only}not with the 'email'(as email will be choosen when only one record is present with the unique key and source is email)
query using:
db.customer.aggregate([
{
"$match": {
"active": true,
"dealerCode": { "$in": ["111391"] },
"source": { "$in": ["email", "TV", "internet"] }
}
},
{
$group: {
"_id": {
"firstName": "$personalInfo.firstName",
"lastName": "$personalInfo.lastName",
"code": "$vehicle.code"
},
"source": {
$addToSet: { "source": "$source" }
}
}
},
{
$redact:
{
$cond: [
{ $eq: [{ $ifNull: ["$source", "other"] }, "email"] },
"$$PRUNE",
"$$DESCEND"
]
}
},
{
$project:
{
"source":
{
$map:
{
"input": {
$cond: [
{ $eq: [{ $size: "$source" }, 0] },
[{ "source": "email" }],
"$source"
]
},
"as": "inp",
"in": "$$inp.source"
}
},
"record": { "_id": 1 }
}
}
])
sample output:
{ "_id" : { "firstName" : "sGI6YaJ36WRfI4xuJQzI7A==", "lastName" : "99eQ7i+uTOqO8X+IPW+NOA==", "code" : "1GTHK23688F113955" }, "source" : ["internet"] }
{ "_id" : { "firstName" : "WYDROTF/9vs9O7XhdIKd5Q==", "lastName" : "BM18Uq/ltcbdx0UJOXh7Sw==", "code" : "1G4GE5GV5AF180133" }, "source" : ["internet"] }
{ "_id" : { "firstName" : "id+U2gYNHQaNQRWXpe34MA==", "lastName" : "AIs1G33QnH9RB0nupJEvjw==", "code" : "1G4GE5EV0AF177966" }, "source" : ["internet"] }
{ "_id" : { "firstName" : "qhreJVuUA5l8lnBPVhMAdw==", "lastName" : "petb0Qx3YPfebSioY0wL9w==", "code" : "1G1AL55F277253143" }, "source" : ["TV"] }
{ "_id" : { "firstName" : "qhreJVuUA5l8lnBPVhMAdw==", "lastName" : "6LB/NmhbfqTagbOnHFGoog==", "code" : "1GCVKREC0EZ168134" }, "source" : ["TV", "internet"] }
This is a problem with this query please suggest :(

Your code doesn't work, because $cond is not an accumulator operator. Only these accumulator operators, can be used in a $group stage.
Assuming your records contain not more than two possible values of source as you mention in your question, you could add a conditional $project stage and modify the $group stage as,
Code:
db.customer.aggregate([
{
$group: {
"_id": {
"id": "$id",
"firstName": "$firstName",
"lastName": "$lastName",
"code": "$code"
},
"sourceA": { $first: "$source" },
"sourceB": { $last: "$source" }
}
},
{
$project: {
"source": {
$cond: [
{ $eq: ["$sourceA", "email"] },
"$sourceB",
"$sourceA"
]
}
}
}
])
In case there can be more that two possible values for source, then you could do the following:
Group by the id, firstName, lastName and code. Accumulate
the unique values of source, using the $addToSet operator.
Use $redact to keep only the values other than email.
Project the required fields, if the source array is empty(all the elements have been removed), add a
value email to it.
Unwind the source field to list it as a field and not an array.
(optional)
Code:
db.customer.aggregate([
{
$group: {
"_id": {
"id": "$id",
"firstName": "$firstName",
"lastName": "$lastName",
"code": "$code"
},
"sourceArr": { $addToSet: { "source": "$source" } }
}
},
{
$redact: {
$cond: [
{ $eq: [{ $ifNull: ["$source", "other"] }, "email"] },
"$$PRUNE",
"$$DESCEND"
]
}
},
{
$project: {
"source": {
$map: {
"input":
{
$cond: [
{ $eq: [{ $size: "$sourceArr" }, 0] },
[{ "source": "item" }],
"$sourceArr"]
},
"as": "inp",
"in": "$$inp.source"
}
}
}
}
])

Related

MongoDB $lookup on array of objects

Categories
{
"_id" : ObjectId("61740086893f048528d166b9"),
"name": "Category1",
"tracks" : [
"61c65353565a2d9a1cd3020d",
"61c74518962dc3efb96c3438",
"61c74775703176a6f72df444"
]
}
Tracks
{
"_id" : ObjectId("61c65353565a2d9a1cd3020d"),
"name" : "Track1",
"categoryId" : ObjectId("61740086893f048528d166b9"),
"creatorId" : ObjectId("61c6478304e98ed63e8ee7d3"),
"thumbnailId" : ObjectId("61c65353565a2d9a1cd3020c"),
"plays" : [],
"media" : {
"type" : "wav",
"url" : ""
},
"status" : "approved",
"downloads" : [],
"uploadedDate" : 1640387411
}
Assuming that I have 5 categories and each category has many tracks ID, I wanna get N last tracks for each category so I used this code below
categories.aggregate([
{
$project: {
tracks: { $slice: ["$tracks", -2] },
},
},
]
And the response is
[
{
"_id": "61740086893f048528d166b9",
"tracks": [
"61c74518962dc3efb96c3438",
"61c74775703176a6f72df444"
]
},
{
"_id": "61740094893f048528d166c1",
"tracks": []
},
{
"_id": "617400a0893f048528d166cb",
"tracks": []
}
]
So far it's good, but the question is how can I replace each category's tracks from an array of IDs to an array of objects?
I tried $loopup but I probably didn't implement the localField correctly.
Expected result
[
{
"_id": "61740086893f048528d166b9",
"tracks": [
{
"_id": ObjectId("61c74518962dc3efb96c3438")
...
},
{
"_id": ObjectId("61c74775703176a6f72df444")
...
}
]
},
{
"_id": "61740094893f048528d166c1",
"tracks": []
},
{
"_id": "617400a0893f048528d166cb",
"tracks": []
}
]
***** UPDATE *****
I'm trying to replace the creatorId by createdBy which is an object of the users from the users collection
Users
{
"_id": ObjectId("61c6478304e98ed63e8ee7cb"),
"email": "USER888#gmail.com",
"username": "USER999",
"tracks": [
ObjectId("61c65353565a2d9a1cd3020d"),
],
}
The expected result should be
[
{
"_id": "61740086893f048528d166b9",
"tracks": [
{
"_id": ObjectId("61c74518962dc3efb96c3438"),
"createdBy": {
"_id": "userId"
...
},
...
},
{
"_id": ObjectId("61c74775703176a6f72df444"),
"createdBy": {
"_id": "userId"
...
}
...
}
]
},
{
"_id": "61740094893f048528d166c1",
"tracks": []
},
{
"_id": "617400a0893f048528d166cb",
"tracks": []
}
]
In addition to the solution below by ray, I added the code here https://mongoplayground.net/p/8AjmnL-vhtz
The createdBy is at the top level but not under every track
$lookup is the correct way for you to find the corresponding object in Tracks collection. Why your code does not work is that you are storing strings in tracks array in Categories collection; while the _id of Tracks collection is ObjectId. There will be no $lookup result as the datatypes do not match. What you can do is converting the strings to ObjectId by using $toObjectId in a $map, and then do the $lookup
db.categories.aggregate([
{
$project: {
tracks: {
$slice: [
"$tracks",
-2
]
}
}
},
{
$project: {
tracks: {
"$map": {
"input": "$tracks",
"as": "t",
"in": {
"$toObjectId": "$$t"
}
}
}
}
},
{
"$lookup": {
"from": "tracks",
let: {
t: "$tracks"
},
pipeline: [
{
$match: {
$expr: {
"$in": [
"$_id",
"$$t"
]
}
}
}
],
"as": "tracks"
}
}
])
Here is the Mongo playground for your reference.

Aggregation at each document level mongodb

I have a list of documents like this
[{
"_id": "5dbc95f921d7625303fe2369",
"name": "John",
"itemsPurchased": [{
"offer": "o1",
"items": ["p1"]
},{
"offer": "o1",
"items": ["p1"]
},
{
"offer": "o1",
"items": ["p2"]
},
{
"offer": "o2",
"items": ["p1"]
}, {
"offer": "o7",
"items": ["p1"]
}
]
},
{
"_id": "zbc95f921d7625303fe2363",
"name": "Doe",
"itemsPurchased": [{
"offer": "o1",
"items": ["p11"]
},{
"offer": "o1",
"items": ["p11"]
},
{
"offer": "o2",
"items": ["p13"]
},
{
"offer": "o1",
"items": ["p22"]
},
{
"offer": "o2",
"items": ["p11"]
}, {
"offer": "o3",
"items": ["p11"]
}
]
}
]
And i am trying to compute unique offers on unique products by each customer, expecting the resultant to be like:
[
{
"_id": "5dbc95f921d7625303fe2369",
"name": "John",
"offersAndProducts": {
"o1":2,
"o2":2,
"o3":1
},
{
"_id": "zbc95f921d7625303fe2363",
"name": "Doe",
"offersAndProducts": {
"o1":2,
"o2":1,
"o7":1
}
]
I want to apply aggregations per document, After performing $unwind on itemsPurchased, applied $group on items and then on offer to eliminate the duplication:
{
"$group" : {
"_id" : {
"item" : {
"$arrayElemAt" : [
"$itemsPurchased.item",
0.0
]
},
"count" : {
"$sum" : 1.0
},
"offer" : "$itemsPurchased.offer"
}
}
}
then,
{
"$group" : {
"_id" : "$_id.offer",
"count" : {
"$sum" : 1.0
}
}
}
this gives the array of products and offers for all documents:
[
{o1:4,o2:3,o3:1,o7:1}
]
But i need it at document level.
tried $addFeild, but $unwind and $match operators gives invalid error.
Any other way of achieving this?
Generally speaking, it's an anti-pattern to $unwind an array and then to $group on the original _id since most operations can be done on the array directly, in a single stage. Here is what such a stage would look like:
{$addFields:{
offers:{$arrayToObject:{
$map:{
input:{$setUnion:"$itemsPurchased.offer"},
as:"o",
in:[
"$$o",
{$size:{$setUnion:{$let:{
vars:{items:{$filter:{
input:"$itemsPurchased",
cond:{$eq:["$$this.offer","$$o"]}
}}},
in:{$reduce:{
input:"$$items",
initialValue:[],
in:{$concatArrays:["$$value","$$items.items"]}
}}
}}}
}]
}
}}
}}
What this does is create an array where each element is a two element array (which is a syntax that $arrayToObject can convert to an object where first element is key name and second is value) and the input is a unique set of offers and for each we accumulate an array of products, get rid of duplicates (with $setUnion) and then get the size of the result. What this produces on your input is this:
"offers" : {
"o1" : 2,
"o2" : 2,
"o3" : 1
}
You need to run $unwind and $group twice. To count only unique items you can use $addToSet. To build your keys dynamically you need to use $arrayToObject:
db.collection.aggregate([
{
$unwind: "$itemsPurchased"
},
{
$unwind: "$itemsPurchased.items"
},
{
$group: {
_id: {
_id: "$_id",
offer: "$itemsPurchased.offer"
},
name: { $first: "$name" },
items: { $addToSet: "$itemsPurchased.items" }
}
},
{
$group: {
_id: "$_id._id",
name: { $first: "$name" },
offersAndProducts: { $push: { k: "$_id.offer", v: { $size: "$items" } } }
}
},
{
$project: {
_id: 1,
name: 1,
offersAndProducts: { $arrayToObject: "$offersAndProducts" }
}
}
])
Mongo Playground

MongoDB: How to make two unions to the same collection with matrices

I have user records with posts and posts shared with them, that is, users can share the posts with other users. I need to be able to bring or get only the posts shared with him, using as a reference the id of the user who shared the post and the id of the post.
when I use the user id as a reference, it works, but when I try to combine it with the id of the post it does not get anything, this happens when I try to use only the id of the post to get the shared posts.
This would be the structure of the records
{
"_id" : ObjectId("5cd573b2bb9ad84f9bba2f74"),
"name" : "name 4",
"posts" : [
{
"_id" : ObjectId("5cd573b2bb9ad84f9bba2f72"),
"name" : "post 1"
},
{
"_id" : ObjectId("5cd573b2bb9ad84f9bba2f73"),
"name" : "post 2"
}
],
"postSharedWithMe" : [
{
"user_id" : "5cd4aaedfcf8d8583cf97494",
"post_id" : "5cd4aaedfcf8d8583cf97492"
},
{
"user_id" : "5cd4aaedfcf8d8583cf97494",
"post_id" : "5cd4aaedfcf8d8583cf97493"
}
]
}
and in this way he tried to consult them
db.users.aggregate([
{ "$match": { "_id": ObjectId("5cd573b2bb9ad84f9bba2f74") }},
{ $unwind:"$postSharedWithMe" },
{ $unwind:"$posts" },
{
$lookup:
{
from: "users",
let: {
user_id: { "$toObjectId": "$postSharedWithMe.user_id"},
post_id : { "$toObjectId": "$postSharedWithMe.post_id"}
},
pipeline: [
{ $match:
{ $expr:
{ $and:
[
{ $eq: [ "$_id", "$$user_id" ] },
{ $eq: [ "$posts._id", "$$post_id" ] }
]
}
}
},
],
as: "sharedPosts"
}
},
{ $unwind:"$sharedPosts" },
{ "$group": {
"_id": "$_id",
"sharedPosts": { "$push": "$sharedPosts" }
}
}
])
and this is the result:
Fetched 0 record(s) in 0ms
and this is what I expected
{
"_id" : ObjectId("5cd573b2bb9ad84f9bba2f74"),
"name" : "username",
"posts" : [
{
"_id" : ObjectId("5cd573b2bb9ad84f9bba2f72"),
"name" : "post 1"
},
{
"_id" : ObjectId("5cd573b2bb9ad84f9bba2f73"),
"name" : "post 2"
}
],
"sharedPosts" : [
{
"_id" : ObjectId("id"),
"name" : "shared post"
},
{
"_id" : ObjectId("id"),
"name" : "shared post"
}
]
}
apparently I needed to go through all the posts first while referring to the shared posts, the result of this was an array, now I just needed to make the $ unwind and compare with $ eq and it worked!
db.users.aggregate([
{ $match: { "_id": ObjectId("5cd573b2bb9ad84f9bba2f74") }},
{ $unwind: "$postSharedWithMe" },
{
$lookup:
{
from: "users",
let: {
user_id: { $toObjectId: "$postSharedWithMe.user_id"},
post_id : { $toObjectId: "$postSharedWithMe.post_id"}
},
pipeline: [
{ $match:
{ $expr:
{ $and:
[
{ $eq: [ "$$user_id", "$_id" ] },
{ $in: ["$$post_id", "$posts._id" ] },
]
}
}
},
{ $unwind: "$posts" },
{ $match: { $expr: { $eq: [ "$posts._id", "$$post_id" ] } } },
],
as: "sharedPosts"
}
},
{ $unwind: "$sharedPosts" },
{ $group: {
_id: "$_id",
name: { "$first": "$name" },
posts: { "$first": "$posts" },
sharedPosts: { $push:
"$sharedPosts.posts"
}
}
}
])

Combine results based on condition during group by

Mongo query generated out of java code:
{
"pipeline": [{
"$match": {
"Id": "09cd9a5a-85c5-4948-808b-20a52d92381a"
}
},
{
"$group": {
"_id": "$result",
"id": {
"$first": "$result"
},
"labelKey": {
"$first": {
"$ifNull": ["$result",
"$result"]
}
},
"value": {
"$sum": 1
}
}
}]
}
Field 'result' can have values like Approved, Rejected, null and "" (empty string). What I am trying to achieve is combining the count of both null and empty together.
So that the empty string Id will have the count of both null and "", which is equal to 4
I'm sure theres a more "proper" way but this is what i could quickly come up with:
[
{
"$group" : {
"_id" : "$result",
"id" : {
"$first" : "$result"
},
"labelKey" : {
"$first" : {
"$ifNull" : [
"$result",
"$result"
]
}
},
"value" : {
"$sum" : 1.0
}
}
},
{
"$group" : {
"_id" : {
"$cond" : [{
$or: [
{"$eq": ["$_id", "Approved"]},
{"$eq": ["$_id", "Rejected"]},
]}},
"$_id",
""
]
},
"temp" : {
"$push" : {
"_id" : "$_id",
"labelKey" : "$labelKey"
}
},
"count" : {
"$sum" : "$value"
}
}
},
{
"$unwind" : "$temp"
},
{
"$project" : {
"_id" : "$temp._id",
"labelKey": "$temp.labelKey",
"count" : "$count"
}
}
],
);
Due to the fact the second group is only on 4 documents tops i don't feel too bad about doing this.
I have used $facet.
The MongoDB stage $facet lets you run several independent pipelines within the stage of a pipeline, all using the same data. This means that you can run several aggregations with the same preliminary stages, and successive stages.
var queries = [{
"$match": {
"Id": "09cd9a5a-85c5-4948-808b-20a52d92381a"
}
},{
$facet: {//
"empty": [
{
$match : {
result : { $in : ['',null]}
}
},{
"$group" : {
"_id" : null,
value : { $sum : 1}
}
}
],
"non_empty": [
{
$match : {
result : { $nin : ['',null]}
}
},{
"$group" : {
"_id" : '$result',
value : { $sum : 1}
}
}
]
}
},
{
$project: {
results: {
$concatArrays: [ "$empty", "$non_empty" ]
}
}
}];
Output :
{
"results": [{
"_id": null,
"value": 52 // count of both '' and null.
}, {
"_id": "Approved",
"value": 83
}, {
"_id": "Rejected",
"value": 3661
}]
}
Changing the group by like below solved the problem
{
"$group": {
"_id": {
"$ifNull": ["$result", ""]
},
"id": {
"$first": "$result"
},
"labelKey": {
"$first": {
"$ifNull": ["$result",
"$result"]
}
},
"value": {
"$sum": 1
}
}
}

$elemMatch against two Array elements if one fails

A bit odd but this is what I am looking for.
I have an array as follow:
Document 1:
Items: [
{
"ZipCode": "11111",
"ZipCode4" "1234"
}
Document 2:
Items: [
{
"ZipCode": "11111",
"ZipCode4" "0000"
}
I would like to use a single query, and send a filter on ZipCode = 1111 && ZipCode4 = 4321, if this fails, the query should look for ZipCode = 1111 && ZipCode4: 0000
Is there a way to do this in a single query ? or do I need to make 2 calls to my database ?
For matching both data set (11111/4321) and (11111/0000), you can use $or and $and with $elemMatch like the following :
db.test.find({
$or: [{
$and: [{
"Items": {
$elemMatch: { "ZipCode": "11111" }
}
}, {
"Items": {
$elemMatch: { "ZipCode4": "4321" }
}
}]
}, {
$and: [{
"Items": {
$elemMatch: { "ZipCode": "11111" }
}
}, {
"Items": {
$elemMatch: { "ZipCode4": "0000" }
}
}]
}]
})
As you want conditional staging, this is not possible but we can get closer to it like this :
db.test.aggregate([{
$match: {
$or: [{
$and: [{ "Items.ZipCode": "11111" }, { "Items.ZipCode4": "4321" }]
}, {
$and: [{ "Items.ZipCode": "11111" }, { "Items.ZipCode4": "0000" }]
}]
}
}, {
$project: {
Items: 1,
match: {
"$map": {
"input": "$Items",
"as": "val",
"in": {
"$cond": [
{ $and: [{ "$eq": ["$$val.ZipCode", "11111"] }, { "$eq": ["$$val.ZipCode4", "4321"] }] },
true,
false
]
}
}
}
}
}, {
$unwind: "$match"
}, {
$group: {
_id: "$match",
data: {
$push: {
_id: "$_id",
Items: "$Items"
}
}
}
}])
The first $match is for selecting only the items we need
The $project will build a new field that check if this items is from the 1st set of data (11111/4321) or the 2nd set of data (11111/0000).
The $unwind is used to remove the array generated by $map.
The $group group by set of data
So in the end you will have an output like the following :
{ "_id" : true, "data" : [ { "_id" : ObjectId("58af69ac594b51730a394972"), "Items" : [ { "ZipCode" : "11111", "ZipCode4" : "4321" } ] }, { "_id" : ObjectId("58af69ac594b51730a394974"), "Items" : [ { "ZipCode" : "11111", "ZipCode4" : "4321" } ] } ] }
{ "_id" : false, "data" : [ { "_id" : ObjectId("58af69ac594b51730a394971"), "Items" : [ { "ZipCode" : "11111", "ZipCode4" : "0000" } ] } ] }
Your application logic can check if there is _id:true in this output array, just take the corresponding data field for _id:true. If there is _id:false in this object take the corresponding data field for _id:false.
In the last $group, you can also use $addToSet to builds 2 field data1 & data2 for both type of data set but this will be painful to use as it will add null object to the array for each one of the opposite type :
"$addToSet": {
"$cond": [
{ "$eq": ["$_id", true] },
"$data",
null
]
}
Here is a gist