How to use $mergeObjects to merge arrays corresponding to existing groups? - mongodb

I'm trying to merge two arrays in my aggregation pipeline. After performing $facet, my MongoDB document has this format:
{
"final": [
{
"key": "TP-1",
"status_map": [
{ "status": "Closed", "final": [ "a", "b"]},
{ "status": "Done", "final": ["c","d" ] }
]
},
{
"key": "TP-2",
"status_map": [
{ "status": "Closed", "final": [ "x","y"] }
]
}
],
"start": [
{
"key": "TP-1",
"status_map": [
{ "status": "Closed", "start": [ "h"]},
{ "status": "Done", "start": ["a"]}
]
},
{
"key": "TP-2",
"status_map": [{ "status": "Done", "start": ["l","m"]}
]
}
]
}
Expected Output:
I need to merge final and start array corresponding to two groups:
Based on key and then
Based on status
{
"data": [
{
"key": "TP-1",
"status_map": [
{ "status": "Closed","final": ["a","b"],"start":["h"]},
{ "status": "Done","final": ["c","d"],"start":["a"]}
]
},
{
"key": "TP-2",
"status_map": [
{ "status": "Closed", "final":[ "x","y"],"start": []},
{ "status": "Done", "final": [ ],"start": [ "l","m"]}
]
}
]
}
How to achieve this use case?

There are several ways to approach this, not necessarily with $mergeObjects. But since you mentioned $mergeObjects this is one that uses it:
Note that, with this approach, we are merging objects of the same key and status, the values in the arrays will not get concatenated if the same key exists for multiple documents, The arrays will get replaced instead.
db.collection.aggregate([
{
$project: {
all: { $concatArrays: ["$final","$start"] }
}
},
{
$unwind: "$all"
},
{
$unwind: "$all.status_map"
},
{
$group: {
_id: {
_id: "$_id", // keep _id in $group to apply the group for each document, otherwise if you want to apply group on all documents, omit this
key: "$all.key",
status: "$all.status_map.status"
},
status_map: { $mergeObjects: "$$ROOT.all.status_map" }
}
},
{ // some data don't have start or end at all, we have to set a default empty array
$addFields: { // you can skip this stage if you allow data without start and final keys
"status_map.start": { $ifNull: ["$status_map.start", []] },
"status_map.final": { $ifNull: ["$status_map.final", []] }
}
},
{
$group: {
_id: { _id: "$_id._id", key: "$_id.key" },
key: { $first: "$_id.key" },
status_map: { $push: "$status_map" }
}
}
])
Mongo Playground

With no assumptions (for example for both keys to always appear) my strategy was to concat both arrays, unwind and finally group by the key.
db.collection.aggregate([
{
$project: {
concat: {
$concatArrays: [
"$final",
"$start"
]
}
}
},
{
$unwind: "$concat"
},
{
$unwind: "$concat.status_map"
},
{
$group: {
_id: {
k: "$concat.key",
status: "$concat.status_map.status"
},
final: {
$push: "$concat.status_map.final"
},
start: {
$push: "$concat.status_map.start"
}
}
},
{
$group: {
_id: "$_id.k",
status_map: {
$push: {
status: "$_id.status",
final: "$final",
start: "$start"
}
}
}
},
{
$project: {
key: "$_id",
status_map: 1,
_id: 0
}
}
])
Mongo Playground

Adding to #Tom Slabbaert's answer,
Mongo Playground
Here, final and start array is of format array of array. But It has to be simply an array.
It can be achieved by using $unwind on status_map and $reduce on status_map.final and status_map.start arrays.
Final query:
db.collection.aggregate([
{
$project: {
concat: {
$concatArrays: [
"$final",
"$start"
]
}
}
},
{
$unwind: "$concat"
},
{
$unwind: "$concat.status_map"
},
{
$group: {
_id: {
k: "$concat.key",
status: "$concat.status_map.status"
},
final: {
$push: "$concat.status_map.final"
},
start: {
$push: "$concat.status_map.start"
}
}
},
{
$group: {
_id: "$_id.k",
status_map: {
$push: {
status: "$_id.status",
final: "$final",
start: "$start"
}
}
}
},
{
$project: {
key: "$_id",
status_map: 1,
_id: 0
}
},
{
$unwind: "$status_map"
},
{
$project: {
key: 1,
"status_map.status": 1,
final: {
$reduce: {
input: "$status_map.final",
initialValue: [],
in: {
$concatArrays: [
"$$value",
"$$this"
]
}
}
},
start: {
$reduce: {
input: "$status_map.start",
initialValue: [],
in: {
$concatArrays: [
"$$value",
"$$this"
]
}
}
}
}
},
{
$group: {
_id: "$key",
status_map: {
$push: {
status: "$status_map.status",
final: "$final",
start: "$start"
}
}
}
}
])
Mongo Playground

Related

Lodash `countBy` equivalent in MongoDB?

Let's say I have the input docs below:
[
{
"_id": "6225ca4052e7c226e2dd836d",
"data": [
"07",
"07",
"12",
"19",
"07",
"32"
]
},
{
"_id": "6225ca4052e7c226e2dd888f",
"data": [
"99",
"97",
"52",
"99",
"58",
"92"
]
}
]
I want to count the occurrences of every element in data string array per document. In JS, I can use countBy. How can I achieve the same using MongoDB Aggregation Framework?
I have tried to $reduce but MongoDB seems to not support assigning dynamic field to object.
{
$reduce: {
input: '$data',
initialValue: {},
in: { // assign `$$this` with count to `$$value`, but failed! }
}
}
Below is the desired output.
[
{
"_id": "6225ca4052e7c226e2dd836d",
"freqs": {
"12": 1,
"19": 1,
"32": 1,
"07": 3
}
},
{
"_id": "6225ca4052e7c226e2dd888f",
"freqs": {
"52": 1,
"58": 1,
"92": 1,
"97": 1,
"99": 2
}
}
]
db.collection.aggregate([
{
$match: {}
},
{
$unwind: "$data"
},
{
$group: {
_id: "$data",
c: { $sum: 1 },
id: { $first: "$_id" }
}
},
{
$group: {
_id: "$id",
data: { $push: { k: "$_id", v: "$c" } }
}
},
{
$set: {
data: { $arrayToObject: "$data" }
}
}
])
mongoplayground
db.collection.aggregate([
{
$set: {
data: {
$function: {
body: "function(d) {let obj = {}; d.forEach(e => {if(obj[e]==null) { obj[e]=1; }else{ obj[e]++; }}); return obj;}",
args: [
"$data"
],
lang: "js"
}
}
}
}
])
mongoplayground

MongoDB compare endTime with startTime of next document

I have a similar collection where I have sort them by their startTime:
{"name": 'A', "startTime": '1634626355', "endTime": '1634631405'}
{"name": 'A', "startTime": '1634631406', "endTime": '1634631864'}
{"name": 'A', "startTime": '1634631865', "endTime": '1634656048'}
{"name": 'A', "startTime": '1634712642', "endTime": '1634718856'}
How can I compare the documents such that if the document endTime and the next document startTime duration is less than 5 minutes, merge it.
This is the result I'm trying to achieve (The 1st 3 documents are merged into 1 where it uses the startTime of the 1st document and the endTime of the 3rd document):
{"name": 'A', "startTime": '1634626355', "endTime": '1634656048'}
{"name": 'A', "startTime": '1634712642', "endTime": '1634718856'}
Thanks
First of all, you should never store date/time values as string, it's a design flaw. Store always proper Date object.
This solution works without self-lookup, so it may perform better:
db.collection.aggregate([
{
$set: {
startDateTime: { $toDate: { $multiply: ["$startTime", 1000] } },
endDateTime: { $toDate: { $multiply: ["$endTime", 1000] } }
},
},
{ $sort: { startDateTime: 1 } },
{ $group: { _id: null, data: { $push: "$$ROOT" } } },
{
$set: {
data: {
$reduce: {
input: "$data",
initialValue: [],
in: {
$cond: {
if: {
$or: [
{ $eq: [{ $size: "$$value" }, 0] }, // for the initail element
{
$gt: [
{
$dateDiff: { // calculate difference
endDate: "$$this.startDateTime",
startDate: { $last: "$$value.endDateTime" },
unit: "minute"
}
},
5 // more than 5 Minutes
]
}
]
},
then: { $concatArrays: ["$$value", ["$$this"]] }, // append new element
else: {
$map: {
input: "$$value",
as: "data",
in: {
$cond: {
if: { $eq: ["$$data._id", { $last: "$$value._id" }] }, // find last element
then: { // update last element
$mergeObjects: [
"$$data",
{ endDateTime: "$$this.endDateTime" },
{ endTime: "$$this.endTime" }
]
},
else: "$$data"
}
}
}
}
}
}
}
}
}
},
// some cosmetic
{ $unwind: "$data" },
{ $replaceRoot: { newRoot: "$data" } }
])
Mongo Playground
You can use $lookup in an aggregation pipeline to find out the documents that you need to remove. Then, perform a forEach to remove them.
db.collection.aggregate([
{
$addFields: {
endDateTime: {
"$toDate": {
"$multiply": [
{
$toLong: "$endTime"
},
1000
]
}
}
},
},
{
"$lookup": {
"from": "collection",
let: {
end: "$endDateTime"
},
pipeline: [
{
"$addFields": {
startDateTime: {
"$toDate": {
"$multiply": [
{
$toLong: "$startTime"
},
1000
]
}
}
}
},
{
$match: {
$expr: {
$and: [
{
$lte: [
{
$subtract: [
"$startDateTime",
"$$end"
]
},
300000
]
},
{
$lte: [
"$$end",
"$startDateTime"
]
}
]
}
}
}
],
"as": "lessThan5min"
}
},
{
"$unwind": "$lessThan5min"
},
{
"$replaceRoot": {
"newRoot": "$lessThan5min"
}
}
]).forEach(function(doc){
db.collection.remove({ "_id": doc._id });
});
Here is the Mongo playground to find out the documents that you need to remove for your reference.

Mongodb loop through every distinct values and select tags using aggregate (facet)

I have collection like this:
{
"labels": [{
"description": "Dog"
}, {
"description": "Red"
}, {
"description": "XXX"
}]
}
{
"labels": [{
"description": "Cat"
}, {
"description": "XXX"
}, {
"description": "Yellow"
}]
}
{
"labels": [{
"description": "Dog"
}, {
"description": "Red"
}, {
"description": "Yellow"
}]
}
{
"labels": [{
"description": "Bird"
}, {
"description": "XXX"
}, {
"description": "XXX"
}]
}
I want to filter for example only "Red" and "Yellow" colors from ALL elements and output document like this:
// because "Dog" appears 2 times so total = 2
{
description: "Dog",
total: 2,
colors: [
{ "_id": "Red", total: 2 },
{ "_id": "Yellow", total: 1 }
]
}
{
description: "Cat",
total: 1,
colors: [
{ "_id": "Yellow", total: 1 }
]
}
{
description: "Bird",
total: 1,
colors: []
}
{
description: "Red",
total: 2,
colors: [
{ _id: "Yellow", total: 1 }
]
}
{
description: "XXX",
total: 4,
colors: [
{ _id: "Yellow", total: 1 }
]
}
I can do this by using collection.distinct('labels.description') and then iterating through every single element + make a separate collection.count({ 'labels.description': 'Dog' }) like this:
for (...)
db.collection.aggregate([
{
"$match": {
"labels.description": valueFromLoop // (e.g. Dog)
}
},
{ $unwind : "$labels" },
{
"$group": {
"_id": "$labels.description",
"count": { "$sum": 1 }
}
},
{
"$match": {
"$or": [
{ "_id": "Red" },
{ "_id": "Yellow" }
]
}
},
{
"$sort": {
"count": -1
}
}
])
I want to do this in a single aggregation or mapReduce so that I could easily output it to new collection using $out instead of using Bulk operations separately, however I don't know if it's possible.
Try this:
let filter = ["Red", "Yellow"];
db.testcollection.aggregate([
{
$addFields: { bkp: "$labels" }
},
{ $unwind: "$labels" },
{
$addFields: {
bkp: {
$filter: {
input: "$bkp",
as: "item",
cond: {
$and: [
{ $ne: ["$$item.description", "$labels.description"] },
{ $in: ["$$item.description", filter] }
]
}
}
}
}
},
{
$unwind: {
path: "$bkp",
preserveNullAndEmptyArrays: true
}
},
{
$group: {
_id: {
key1: "$labels.description",
key2: { $ifNull: ["$bkp.description", false] }
},
total: { $sum: 1 }
}
},
{
$group: {
_id: "$_id.key1",
description: { $first: "$_id.key1" },
total: {
$sum: {
$cond: {
if: { $first: [["$_id.key2"]] },
then: 1,
else: "$total"
}
}
},
colors: {
$push: {
$cond: {
if: { $first: [["$_id.key2"]] },
then: {
_id: "$_id.key2",
total: "$total"
},
else: "$$REMOVE"
}
}
}
}
},
{ $project: { _id: 0 } }
]);
For some reason with code from both answers it does not count all tags properly.
I'm posting what works:
db.collection.aggregate([
{
$project: {
labels: 1,
result: {
$filter: {
input: "$labels",
as: "label",
cond: {
$or: [
{ $eq: ["$$label.description", "Blue"] },
{ $eq: ["$$label.description", "Red"] },
{ $eq: ["$$label.description", "Black-and-white"] },
{ $eq: ["$$label.description", "Purple"] },
{ $eq: ["$$label.description", "Orange"] },
{ $eq: ["$$label.description", "Yellow"] },
{ $eq: ["$$label.description", "Green"] },
{ $eq: ["$$label.description", "Teal"] }
]
}
}
}
}
},
{
$unwind: "$labels"
},
{
"$group": {
_id: "$labels.description",
x: {
$push: "$result.description"
},
total: { "$sum": 1 }
}
},
{
$project: {
x: {
$reduce: {
input: '$x',
initialValue: [],
in: {$concatArrays: ['$$value', '$$this']}
}
},
total: 1
}
},
{
$project: {
x: 1,
y: { $setUnion: "$x" },
total: 1
}
},
{
$project: {
_id: 0,
description: "$_id",
"colors": {
$map: {
input: "$y",
as: "item",
in: {
_id: "$$item",
count: {
$size: {
$filter: {
input: "$x",
as: "itemx",
cond: {
$eq: ["$$item", "$$itemx"]
}
}
}
}
}
}
},
total: 1
}
},
{
$out: "backgrounds_meta"
}
])
db.test2.aggregate([
{
$project: {
labels:1,
colours: {
$filter: {
input: "$labels",
as: "label",
cond: {
$or: [
{$eq:["Yellow","$$label.description"]},
{$eq:["Red", "$$label.description"]}
]
}
}
}
}
},
{$unwind:"$labels"},
{$group:{
_id: "$labels.description",
total: {$sum:1},
colours: {$addToSet:"$colours.description"}
}},
{
$project:{
_id:0,
description:"$_id",
total:1,
colours: {
$reduce:{
input: "$colours",
initialValue: [],
in: {$concatArrays: ["$$value", "$$this"]}
}
}
}
},
{
$unwind: {
path:"$colours",preserveNullAndEmptyArrays: true
}
},
{
$group:{
_id:{
description:"$description",
total:"$total",
colour:"$colours"
},
count: {
$sum: {$cond:[{$ifNull:["$colours",false]},1,0]}
}
}
},
{
$group:{
_id:{
description:"$_id.description",
total:"$_id.total"
},
colours: {
$push: {
$cond: [{$gt:["$count",0]},
{
"_id":"$_id.colour",
total:"$count"
},
"$$REMOVE"
]
}
}
}
},
{
$project: {
_id:0,
description: "$_id.description",
total: "$_id.total",
colours: 1
}
}
]);
**Edit In your answer, you are missing the Yellows for Red and Dog because you are taking the first item from $result with $arrayElemAt: ["$result.description", 0].
If description is a colour, do you also want to include the counts for itself in colours?
Never mind, you've updated the answer

Mongo Query to fetch distinct nested documents

I need to fetch distinct nested documents.
Please find the sample document:
{
"propertyId": 1001820437,
"date": ISODate("2020-07-17T00:00:00.000Z"),
"HList":[
{
"productId": 123,
"name": "Dubai",
"tsh": true
}
],
"PList":[
{
"productId": 123,
"name": "Dubai",
"tsh": false
},
{
"productId": 234,
"name": "India",
"tsh": true
}
],
"CList":[
{
"productId": 234,
"name": "India",
"tsh": false
}
]
}
Expected result is:
{
"produts":[
{
"productId": 123,
"name": "Dubai"
},
{
"productId": 234,
"name": "India"
}
]
}
I tried with this query:
db.property.aggregate([
{
$match: {
"propertyId": 1001820437,
"date": ISODate("2020-07-17T00:00:00.000Z")
}
},
{
"$project": {
"_id": 0,
"unique": {
"$filter": {
"input": {
"$setDifference": [
{
"$concatArrays": [
"$HList.productId",
"$PList.productId",
"$CList.productId"
]
},
[]
]
},
"cond": {
"$ne": [ "$$this", "" ]
}
}
}
}
}
]);
Is $setDifference aggregation is correct choice here?
My query returns only unique product ids but i need a productId with name.
Could someone help me to solve this?
Thanks in advance
You can use $projectfirst to get rid of tsh field and then run $setUnion which ignores duplicated entries:
db.collection.aggregate([
{
$project: {
"HList.tsh": 0,
"PList.tsh": 0,
"CList.tsh": 0,
}
},
{
$project: {
products: {
$setUnion: [ "$HList", "$PList", "$CList" ]
}
}
}
])
Mongo Playground
The following two aggregations return the expected and same result (you can use any of the two):
db.collection.aggregate( [
{
$project: {
_id: 0,
products: {
$reduce: {
input: { $setUnion: [ "$HList", "$PList", "$CList" ] },
initialValue: [],
in: {
$setUnion: [ "$$value", [ { productId: "$$this.productId", name: "$$this.name" } ] ]
}
}
}
}
}
] )
This one is little verbose:
db.collection.aggregate( [
{
$project: { list: { $setUnion: [ "$HList", "$PList", "$CList" ] } }
},
{
$unwind: "$list"
},
{
$group: {
_id: null,
products: { $addToSet: { "productId": "$list.productId", "name": "$list.name" } }
}
},
{
$project: { _id: 0 }
}
] )
db.collection.aggregate([
{
$match: {
"propertyId": 1001820437,
"date": ISODate("2020-07-17T00:00:00.000Z")
}
},
{
$project: {
products: {
$filter: {
input: { "$setUnion" : ["$CList", "$HList", "$PList"] },
as: 'product',
cond: {}
}
}
}
},
{
$project: {
"_id":0,
"products.tsh": 1,
"products.name": 1,
}
},
])

Group on field while getting the last document for each field with MongoDB

Problem
I'm trying to group a stock inventory by products. At first, my stock entries was fully filled each time so I made this aggregate:
[
{ $sort: { date: 1 } },
{
$group: {
_id: '$userId',
stocks: { $last: '$stocks' },
},
},
{ $unwind: '$stocks' },
{
$group: {
_id: '$stocks.productId',
totalQuantity: { $sum: '$stocks.quantity' },
stocks: { $push: { userId: '$_id', quantity: '$stocks.quantity' } },
},
},
]
Now, it can be possible that a stock entry doesn't contain all the products filled. So I'm stuck while writing the new aggregate.
Basically I need to group every products by productId and have an array of the last entry for each user.
Output
This is my expected output:
[
{
"_id": ObjectId("5e75eae1359fc8159d5b6073"),
"totalQuantity": 33,
"stocks": [
{
"userId": ObjectId("5e75f498359fc8159d5b6075"),
"lastDate": "2020-03-21T11:45:53.077Z",
"quantity": 33
}
]
},
{
"_id": ObjectId("5e75eaea359fc8159d5b6074"),
"totalQuantity": 2,
"stocks": [
{
"userId": ObjectId("5e75f498359fc8159d5b6075"),
"lastDate": "2020-03-21T11:45:53.077Z",
"quantity": 2
}
]
}
]
Documents
Documents (when fully filled):
{
"_id": ObjectId("5e75fe71e4a3e0323ba47e0a"),
"date": "2020-03-21T11:45:53.077Z",
"userId": ObjectId("5e75f498359fc8159d5b6075"),
"stocks": [
{
"productId": ObjectId("5e75eae1359fc8159d5b6073"),
"quantity": 33
},
{
"productId": ObjectId("5e75eaea359fc8159d5b6074"),
"quantity": 2
}
]
}
Sometimes it won't be filled for the whole inventory (that's why I need the lastDate):
{
"_id": ObjectId("5e75fe71e4a3e0323ba47e0a"),
"date": "2020-03-21T11:45:53.077Z",
"userId": ObjectId("5e75f498359fc8159d5b6075"),
"stocks": [
{
"productId": ObjectId("5e75eae1359fc8159d5b6073"),
"quantity": 33
}
]
}
Try this one:
db.collection.aggregate([
{
$group: {
_id: "$userId",
root: {
$push: "$$ROOT"
}
}
},
{
$addFields: {
root: {
$map: {
input: "$root",
as: "data",
in: {
"stocks": {
$map: {
input: "$$data.stocks",
as: "stock",
in: {
"productId": "$$stock.productId",
"userId": "$$data.userId",
"quantity": "$$stock.quantity",
"lastDate": "$$data.date"
}
}
}
}
}
}
}
},
{
$unwind: "$root"
},
{
$replaceRoot: {
newRoot: "$root"
}
},
{
$unwind: "$stocks"
},
{
$sort: {
"stocks.lastDate": 1
}
},
{
$group: {
_id: "$stocks.productId",
totalQuantity: {
$last: "$stocks.quantity"
},
stocks: {
$last: "$stocks"
}
}
},
{
$addFields: {
stocks: [
{
"lastDate": "$stocks.lastDate",
"quantity": "$stocks.quantity",
"userId": "$stocks.userId"
}
]
}
}
])
MongoPlayground