MongoDB - Aggregate get specific objects in an array - mongodb

How can I get only objects in the sales array matching with 2021-10-14 date ?
My aggregate query currently returns all objects of the sales array if at least one is matching.
Dataset Documents
{
"name": "#0",
"sales": [{
"date": "2021-10-14",
"price": 3.69,
},{
"date": "2021-10-15",
"price": 2.79,
}]
},
{
"name": "#1",
"sales": [{
"date": "2021-10-14",
"price": 1.5,
}]
}
Aggregate
{
$match: {
sales: {
$elemMatch: {
date: '2021-10-14',
},
},
},
},
{
$group: {
_id: 0,
data: {
$push: '$sales',
},
},
},
{
$project: {
data: {
$reduce: {
input: '$data',
initialValue: [],
in: {
$setUnion: ['$$value', '$$this'],
},
},
},
},
}
Result
{"date": "2021-10-14","price": 3.69},
{"date": "2021-10-15","price": 2.79},
{"date": "2021-10-14","price": 1.5}
Result Expected
{"date": "2021-10-14","price": 3.69},
{"date": "2021-10-14","price": 1.5}

You actually need to use a $replaceRoot or $replaceWith pipeline which takes in an expression that gives you the resulting document filtered using $arrayElemAt (or $first) and $filter from the sales array:
[
{ $match: { 'sales.date': '2021-10-14' } },
{ $replaceWith: {
$arrayElemAt: [
{
$filter: {
input: '$sales',
cond: { $eq: ['$$this.date', '2021-10-14'] }
}
},
0
]
} }
]
OR
[
{ $match: { 'sales.date': '2021-10-14' } },
{ $replaceRoot: {
newRoot: {
$arrayElemAt: [
{
$filter: {
input: '$sales',
cond: { $eq: ['$$this.date', '2021-10-14'] }
}
},
0
]
}
} }
]
Mongo Playground

In $project stage, you need $filter operator with input as $reduce operator to filter the documents.
{
$project: {
data: {
$filter: {
input: {
$reduce: {
input: "$data",
initialValue: [],
in: {
$setUnion: [
"$$value",
"$$this"
],
}
}
},
cond: {
$eq: [
"$$this.date",
"2021-10-14"
]
}
}
}
}
}
Sample Mongo Playground

How about using $unwind:
.aggregate([
{$match: { sales: {$elemMatch: {date: '2021-10-14'} } }},
{$unwind: '$sales'},
{$match: {'sales.date': '2021-10-14'}},
{$project: {date: '$sales.date', price: '$sales.price', _id: 0}}
])
This will separate the sales into different documents, each containing only one sale, and allow you to match conditions easily.
See: https://docs.mongodb.com/manual/reference/operator/aggregation/unwind/

Related

how to use $match after $group in mongodb aggregation

I have 4 products. I want to know the count of product-4 for users who has product-1 or product-2
Sample data:
[
{
"user_id": 1,
"product_type": "product-1"
},
{
"user_id": 1,
"product_type": "product-4"
},
{
"user_id": 1,
"product_type": "product-4"
},
{
"user_id": 2,
"product_type": "product-1"
}
]
user-1 has two product-4 and one product-1 (that counts 2)
user-2 has only product-1, but no product-4 (hence that does not count)
This is how I tried
db.collection.aggregate([
{
$match: {
product_type: {
$in: [
"product-1​",
"product-2",
],
},
},
},
{
$group: {
_id: "$user_id",
},
},
{
$match: {
user_id: { $in: "$_id"}, // I want to use $group's result in here
product_type: "product-4",
},
}
]);
Expected results are:
[
{
"_id": 1,
"count": 2
},
{
"_id": 2,
"count": 0
}
]
Note:
I dont have a backend, I have to this using mongodb only.
Does this answer your question?
db.collection.aggregate([
{$group: {_id: "$user_id", data: {$push: "$product_type"}}},
{$match: {$expr: {$or: [
{$in: ["product-1", "$data"]},
{$in: ["product-2", "$data"]}
]}}},
{$project: {
count: {
$size: {
$filter: {
input: "$data",
cond: {$eq: ["$$this", "product-4"]}
}
}
}
}}
])
See how it works on the playground example

MongoDB compare endTime with startTime of next document

I have a similar collection where I have sort them by their startTime:
{"name": 'A', "startTime": '1634626355', "endTime": '1634631405'}
{"name": 'A', "startTime": '1634631406', "endTime": '1634631864'}
{"name": 'A', "startTime": '1634631865', "endTime": '1634656048'}
{"name": 'A', "startTime": '1634712642', "endTime": '1634718856'}
How can I compare the documents such that if the document endTime and the next document startTime duration is less than 5 minutes, merge it.
This is the result I'm trying to achieve (The 1st 3 documents are merged into 1 where it uses the startTime of the 1st document and the endTime of the 3rd document):
{"name": 'A', "startTime": '1634626355', "endTime": '1634656048'}
{"name": 'A', "startTime": '1634712642', "endTime": '1634718856'}
Thanks
First of all, you should never store date/time values as string, it's a design flaw. Store always proper Date object.
This solution works without self-lookup, so it may perform better:
db.collection.aggregate([
{
$set: {
startDateTime: { $toDate: { $multiply: ["$startTime", 1000] } },
endDateTime: { $toDate: { $multiply: ["$endTime", 1000] } }
},
},
{ $sort: { startDateTime: 1 } },
{ $group: { _id: null, data: { $push: "$$ROOT" } } },
{
$set: {
data: {
$reduce: {
input: "$data",
initialValue: [],
in: {
$cond: {
if: {
$or: [
{ $eq: [{ $size: "$$value" }, 0] }, // for the initail element
{
$gt: [
{
$dateDiff: { // calculate difference
endDate: "$$this.startDateTime",
startDate: { $last: "$$value.endDateTime" },
unit: "minute"
}
},
5 // more than 5 Minutes
]
}
]
},
then: { $concatArrays: ["$$value", ["$$this"]] }, // append new element
else: {
$map: {
input: "$$value",
as: "data",
in: {
$cond: {
if: { $eq: ["$$data._id", { $last: "$$value._id" }] }, // find last element
then: { // update last element
$mergeObjects: [
"$$data",
{ endDateTime: "$$this.endDateTime" },
{ endTime: "$$this.endTime" }
]
},
else: "$$data"
}
}
}
}
}
}
}
}
}
},
// some cosmetic
{ $unwind: "$data" },
{ $replaceRoot: { newRoot: "$data" } }
])
Mongo Playground
You can use $lookup in an aggregation pipeline to find out the documents that you need to remove. Then, perform a forEach to remove them.
db.collection.aggregate([
{
$addFields: {
endDateTime: {
"$toDate": {
"$multiply": [
{
$toLong: "$endTime"
},
1000
]
}
}
},
},
{
"$lookup": {
"from": "collection",
let: {
end: "$endDateTime"
},
pipeline: [
{
"$addFields": {
startDateTime: {
"$toDate": {
"$multiply": [
{
$toLong: "$startTime"
},
1000
]
}
}
}
},
{
$match: {
$expr: {
$and: [
{
$lte: [
{
$subtract: [
"$startDateTime",
"$$end"
]
},
300000
]
},
{
$lte: [
"$$end",
"$startDateTime"
]
}
]
}
}
}
],
"as": "lessThan5min"
}
},
{
"$unwind": "$lessThan5min"
},
{
"$replaceRoot": {
"newRoot": "$lessThan5min"
}
}
]).forEach(function(doc){
db.collection.remove({ "_id": doc._id });
});
Here is the Mongo playground to find out the documents that you need to remove for your reference.

MongoDB count occurances with group and unwind

I have a MongoDB database with the following document structure:
{
"name": "ServiceA",
"areas": ["X", "Y", "Z"],
"tags": [
{
"name": "Financial",
"type": "A"
},
{
"name": "Consumer",
"type": "B"
}
]
}
There's many entries each with the same structure. Containing the same areas.
There's many predefined tag names, sorted into a few types.
The aim is to group by area and then count the number of occurrences of each tag. So an output like this:
{
"area": "X",
"count": 100, // Total entries with X as an area
"tagNameCount": {
"Financial": 20,
"Consumer": 10,
...
},
"tagTypeCount": {
"A": 70,,
"B: 40
}
}
I've been starting of using $unwind on areas, but it's the next steps from there I'm stuck on. I get that I need to use $group, but I can't work out how to count occurrences.
You may use $facet operator which allows perform several aggregation in one.
Walkthrough
1. We $unwind by area and tags
2. With $facet, we perform 3 parallel aggregations:
2.1 We count unique areas
2.2 We count unique tag names for each area
2.3 We count unique tag type for each area
3. We join 2 parallel arrays by flatten areas
4. We assemble desired output
db.collection.aggregate([
{
$unwind: "$areas"
},
{
$unwind: "$tags"
},
{
$facet: {
areas: [
{
$group: {
_id: "$areas",
count: {
$addToSet: "$_id"
}
}
},
{
$project: {
_id: 0,
area: "$_id",
count: {
$size: "$count"
}
}
}
],
tagNameCount: [
{
$group: {
_id: {
name: "$tags.name",
areas: "$areas"
},
count: {
$addToSet: "$_id"
}
}
},
{
$group: {
_id: "$_id.areas",
tagNameCount: {
$push: {
k: "$_id.name",
v: {
$size: "$count"
}
}
}
}
},
{
$addFields: {
tagNameCount: {
$arrayToObject: "$tagNameCount"
}
}
}
],
tagTypeCount: [
{
$group: {
_id: {
type: "$tags.type",
areas: "$areas"
},
count: {
$addToSet: "$_id"
}
}
},
{
$group: {
_id: "$_id.areas",
tagTypeCount: {
$push: {
k: "$_id.type",
v: {
$size: "$count"
}
}
}
}
},
{
$addFields: {
tagTypeCount: {
$arrayToObject: "$tagTypeCount"
}
}
}
]
}
},
{
$unwind: "$areas"
},
{
$addFields: {
"tagNameCount": {
$filter: {
input: "$tagNameCount",
cond: {
$eq: [
"$areas.area",
"$$this._id"
]
}
}
},
"tagTypeCount": {
$filter: {
input: "$tagTypeCount",
cond: {
$eq: [
"$areas.area",
"$$this._id"
]
}
}
}
}
},
{
$project: {
area: "$areas.area",
count: "$areas.count",
tagNameCount: {
$arrayElemAt: [
"$tagNameCount.tagNameCount",
0
]
},
tagTypeCount: {
$arrayElemAt: [
"$tagTypeCount.tagTypeCount",
0
]
}
}
},
{
$sort: {
area: 1
}
}
])
MongoPlayground
Here's one method:
unwind both areas and tags
for each area collect the applicable tags, and the unique names and types
count the names to get the total number of tags
for each unique name, count the matching values in the tags
do the same for each unique type
project out the unique fields
db.collection.aggregate([
{$unwind: "$areas"},
{$unwind: "$tags"},
{$group: {
_id: "$areas",
names: {$push: "$tags.name"},
uniqueNames: {$addToSet: "$tags.name"},
types: {$push: "$tags.type"},
uniqueTypes: {$addToSet: "$tags.type"}
}},
{$addFields: {
count: {$size: "$names"},
names: {
$arrayToObject: {
$map: {
input: "$uniqueNames",
as: "needle",
in: {
k: "$$needle",
v: {
$size: {
$filter: {
input: "$names",
cond: {$eq: ["$$this","$$needle"]}
}}}}}}},
types: {
$arrayToObject: {
$map: {
input: "$uniqueTypes",
as: "needle",
in: {
k: "$$needle",
v: {$size: {
$filter: {
input: "$types",
cond: { $eq: [ "$$this","$$needle"]}
}}}}}}}}},
{
$project: {
uniqueNames: 0,
uniqueTypes: 0
}}
])
Playground

mongoDB updateMany based on nested array condition

I have following structure in users collection:
[
{ "name": "Ivan",
"payments": [
{"date": new Date("2019-01-01"), "details": [{"payment_system": "A", "spent": 95},
{"payment_system": "B", "spent": 123}]},
{"date": new Date("2019-01-03"), "details": [{"payment_system": "A", "spent": 12},
{"payment_system": "B", "spent": 11}]}]},
{ "name": "Mark",
"payments": [
{"date": new Date("2019-01-01"), "details": [{"payment_system": "D", "spent": 456},
{"payment_system": "B", "spent": 123}]},
{"date": new Date("2019-01-02"), "details": [{"payment_system": "A", "spent": 98},
{"payment_system": "C", "spent": 4}]}]}
]
Is it any way to add a field to users who spent more than, lets say 100 during the specific date range in specific payment system?
I tried updateMany, but have no idea how to filter "details" array element based on payment_system field.
For payment_system IN ("A", "C"), date >= "2019-01-02", spent_total >= 100 update should return
[
{ "name": "Ivan", ...},
{ "name": "Mark", "filter_passed": true, ... }
]
This this one:
db.collection.aggregate([
{
$set: {
payments: {
$filter: {
input: "$payments",
cond: { $gte: ["$$this.date", new Date("2019-01-02")] }
}
}
}
},
{
$set: {
spent_total: {
$reduce: {
input: "$payments.details.spent",
initialValue: [],
in: { $concatArrays: ["$$value", "$$this"] }
}
}
}
},
{ $set: { spent_total: { $sum: "$spent_total" } } },
{ $match: { "spent_total": { $gte: 100 } } }
])
Mongo Playground
Update:
Filter by payment_system is a bit longer. You have to $unwind and $group:
db.collection.aggregate([
{
$set: {
payments: {
$filter: {
input: "$payments",
cond: { $gte: ["$$this.date", new Date("2019-01-02")] }
}
}
}
},
{ $unwind: "$payments" },
{
$set: {
"payments.details": {
$filter: {
input: "$payments.details",
cond: { $in: ["$$this.payment_system", ["A", "C"]] }
},
},
}
},
{
$group: {
_id: { _id: "$_id", name: "$name", },
payments: { $push: "$payments" }
}
},
{
$set: {
spent_total: {
$reduce: {
input: "$payments.details.spent",
initialValue: [],
in: { $concatArrays: ["$$value", "$$this"] }
}
}
}
},
{ $set: { spent_total: { $sum: "$spent_total" } } },
{ $match: { "spent_total": { $gte: 100 } } },
{ // just some cosmetic
$project: {
_id: "$_id._id",
name: "$_id.name",
payments: 1
}
}
])
You cannot update your collection like db.collection.updateMany({}, [<the aggregation pipeline from above>]) because it contains $unwind and $group.
However, you can make $lookup or $out to save entire result into new collection.
If you need to sum up for each payment_system individually then try:
db.collection.aggregate([
{
$set: {
payments: {
$filter: {
input: "$payments",
cond: { $gte: ["$$this.date", new Date("2019-01-01")] }
}
}
}
},
{ $unwind: "$payments" },
{
$set: {
"payments.details": {
$filter: {
input: "$payments.details",
cond: { $in: ["$$this.payment_system", ["A", "B","C"]] }
},
},
}
},
{ $unwind: "$payments.details" },
{
$group: {
_id: {
_id: "$_id",
name: "$name",
payments: "$payments.details.payment_system"
},
spent_total: { $sum: "$payments.details.spent" }
}
},
{ $match: { "spent_total": { $gte: 100 } } },
{
$project: {
_id: "$_id._id",
name: "$_id.name",
payments: "$_id.payments",
spent_total: 1
}
}
])

$group inner array values without $unwind

I want to group objects in the array by same value for specified field and produce a count.
I have the following mongodb document (non-relevant fields are not present).
{
arrayField: [
{ fieldA: value1, ...otherFields },
{ fieldA: value2, ...otherFields },
{ fieldA: value2, ...otherFields }
],
...otherFields
}
The following is what I want.
{
arrayField: [
{ fieldA: value1, ...otherFields },
{ fieldA: value2, ...otherFields },
{ fieldA: value2, ...otherFields }
],
newArrayField: [
{ fieldA: value1, count: 1 },
{ fieldA: value2, count: 2 },
],
...otherFields
}
Here I grouped embedded documents by fieldA.
I know how to do it with unwind and 2 group stages the following way. (irrelevant stages are ommited)
Concrete example
// document structure
{
_id: ObjectId(...),
type: "test",
results: [
{ choice: "a" },
{ choice: "b" },
{ choice: "a" }
]
}
db.test.aggregate([
{ $match: {} },
{
$unwind: {
path: "$results",
preserveNullAndEmptyArrays: true
}
},
{
$group: {
_id: {
_id: "$_id",
type: "$type",
choice: "$results.choice",
},
count: { $sum: 1 }
}
},
{
$group: {
_id: {
_id: "$_id._id",
type: "$_id.type",
result: "$results.choice",
},
groupedResults: { $push: { count: "$count", choice: "$_id.choice" } }
}
}
])
You can use below aggregation
db.test.aggregate([
{ "$addFields": {
"newArrayField": {
"$map": {
"input": { "$setUnion": ["$arrayField.fieldA"] },
"as": "m",
"in": {
"fieldA": "$$m",
"count": {
"$size": {
"$filter": {
"input": "$arrayField",
"as": "d",
"cond": { "$eq": ["$$d.fieldA", "$$m"] }
}
}
}
}
}
}
}}
])
The below adds a new array field, which is generated by:
Using $setUnion to get unique set of array items, with inner $map to
extract only the choice field
Using $map on the unique set of items,
with inner $reduce on the original array, to sum all items where
choice matches
Pipeline:
db.test.aggregate([{
$addFields: {
newArrayField: {
$map: {
input: {
$setUnion: [{
$map: {
input: "$results",
in: { choice: "$$this.choice" }
}
}
]
},
as: "i",
in: {
choice: '$$i.choice',
count: {
$reduce: {
input: "$results",
initialValue: 0,
in: {
$sum: ["$$value", { $cond: [ { $eq: [ "$$this.choice", "$$i.choice" ] }, 1, 0 ] }]
}
}
}
}
}
}
}
}])
The $reduce will iterate over the results array n times, where n is the number of unique values of choice, so the performance will depend on that.