I want to group objects in the array by same value for specified field and produce a count.
I have the following mongodb document (non-relevant fields are not present).
{
arrayField: [
{ fieldA: value1, ...otherFields },
{ fieldA: value2, ...otherFields },
{ fieldA: value2, ...otherFields }
],
...otherFields
}
The following is what I want.
{
arrayField: [
{ fieldA: value1, ...otherFields },
{ fieldA: value2, ...otherFields },
{ fieldA: value2, ...otherFields }
],
newArrayField: [
{ fieldA: value1, count: 1 },
{ fieldA: value2, count: 2 },
],
...otherFields
}
Here I grouped embedded documents by fieldA.
I know how to do it with unwind and 2 group stages the following way. (irrelevant stages are ommited)
Concrete example
// document structure
{
_id: ObjectId(...),
type: "test",
results: [
{ choice: "a" },
{ choice: "b" },
{ choice: "a" }
]
}
db.test.aggregate([
{ $match: {} },
{
$unwind: {
path: "$results",
preserveNullAndEmptyArrays: true
}
},
{
$group: {
_id: {
_id: "$_id",
type: "$type",
choice: "$results.choice",
},
count: { $sum: 1 }
}
},
{
$group: {
_id: {
_id: "$_id._id",
type: "$_id.type",
result: "$results.choice",
},
groupedResults: { $push: { count: "$count", choice: "$_id.choice" } }
}
}
])
You can use below aggregation
db.test.aggregate([
{ "$addFields": {
"newArrayField": {
"$map": {
"input": { "$setUnion": ["$arrayField.fieldA"] },
"as": "m",
"in": {
"fieldA": "$$m",
"count": {
"$size": {
"$filter": {
"input": "$arrayField",
"as": "d",
"cond": { "$eq": ["$$d.fieldA", "$$m"] }
}
}
}
}
}
}
}}
])
The below adds a new array field, which is generated by:
Using $setUnion to get unique set of array items, with inner $map to
extract only the choice field
Using $map on the unique set of items,
with inner $reduce on the original array, to sum all items where
choice matches
Pipeline:
db.test.aggregate([{
$addFields: {
newArrayField: {
$map: {
input: {
$setUnion: [{
$map: {
input: "$results",
in: { choice: "$$this.choice" }
}
}
]
},
as: "i",
in: {
choice: '$$i.choice',
count: {
$reduce: {
input: "$results",
initialValue: 0,
in: {
$sum: ["$$value", { $cond: [ { $eq: [ "$$this.choice", "$$i.choice" ] }, 1, 0 ] }]
}
}
}
}
}
}
}
}])
The $reduce will iterate over the results array n times, where n is the number of unique values of choice, so the performance will depend on that.
Related
How can I get only objects in the sales array matching with 2021-10-14 date ?
My aggregate query currently returns all objects of the sales array if at least one is matching.
Dataset Documents
{
"name": "#0",
"sales": [{
"date": "2021-10-14",
"price": 3.69,
},{
"date": "2021-10-15",
"price": 2.79,
}]
},
{
"name": "#1",
"sales": [{
"date": "2021-10-14",
"price": 1.5,
}]
}
Aggregate
{
$match: {
sales: {
$elemMatch: {
date: '2021-10-14',
},
},
},
},
{
$group: {
_id: 0,
data: {
$push: '$sales',
},
},
},
{
$project: {
data: {
$reduce: {
input: '$data',
initialValue: [],
in: {
$setUnion: ['$$value', '$$this'],
},
},
},
},
}
Result
{"date": "2021-10-14","price": 3.69},
{"date": "2021-10-15","price": 2.79},
{"date": "2021-10-14","price": 1.5}
Result Expected
{"date": "2021-10-14","price": 3.69},
{"date": "2021-10-14","price": 1.5}
You actually need to use a $replaceRoot or $replaceWith pipeline which takes in an expression that gives you the resulting document filtered using $arrayElemAt (or $first) and $filter from the sales array:
[
{ $match: { 'sales.date': '2021-10-14' } },
{ $replaceWith: {
$arrayElemAt: [
{
$filter: {
input: '$sales',
cond: { $eq: ['$$this.date', '2021-10-14'] }
}
},
0
]
} }
]
OR
[
{ $match: { 'sales.date': '2021-10-14' } },
{ $replaceRoot: {
newRoot: {
$arrayElemAt: [
{
$filter: {
input: '$sales',
cond: { $eq: ['$$this.date', '2021-10-14'] }
}
},
0
]
}
} }
]
Mongo Playground
In $project stage, you need $filter operator with input as $reduce operator to filter the documents.
{
$project: {
data: {
$filter: {
input: {
$reduce: {
input: "$data",
initialValue: [],
in: {
$setUnion: [
"$$value",
"$$this"
],
}
}
},
cond: {
$eq: [
"$$this.date",
"2021-10-14"
]
}
}
}
}
}
Sample Mongo Playground
How about using $unwind:
.aggregate([
{$match: { sales: {$elemMatch: {date: '2021-10-14'} } }},
{$unwind: '$sales'},
{$match: {'sales.date': '2021-10-14'}},
{$project: {date: '$sales.date', price: '$sales.price', _id: 0}}
])
This will separate the sales into different documents, each containing only one sale, and allow you to match conditions easily.
See: https://docs.mongodb.com/manual/reference/operator/aggregation/unwind/
I am new to mongodb and wanted to convert my array to object using pipeline. For example,
{
field1: [1,2,3,4,5],
field2: [‘a’,’b’,’c’,’d’,’e’],
}
I want the above document to be converted to,
{
fields: [
{
field1: 1,
field2: ‘a’
},
......
{
field1: 5,
field2: ‘e’
}
]
}
Any idea how I can achieve this?
You can use $unwind to separate your arrays.
And then format your new list with $project without forgetting to remove the duplicates created by the $unwind.
db.collection.aggregate({
"$unwind": {
path: "$field1",
includeArrayIndex: "field1_index"
}
},
{
"$unwind": {
"path": "$field2",
"includeArrayIndex": "field2_index"
}
},
{
"$project": {
"fields": {
"field1": "$field1",
"field2": "$field2"
},
"diff": {
$cmp: [
"$field1_index",
"$field2_index"
]
}
}
},
{
"$match": {
"diff": 0
}
},
{
$group: {
_id: "$_id",
fields: {
$push: "$fields"
}
}
})
Try it here
You can use $zip and $map and $reduce to achieve this:
db.collection.aggregate([
{
"$addFields": {
fields: {
$reduce: {
input: {
$zip: {
inputs: [
{
$map: {
input: "$field1",
as: "f1",
in: {
field1: "$$f1"
}
}
},
{
$map: {
input: "$field2",
as: "f2",
in: {
field2: "$$f2"
}
}
}
]
}
},
initialValue: [],
in: {
"$concatArrays": [
[
{
"$mergeObjects": "$$this"
}
],
"$$value"
]
}
}
}
}
}
])
MongoPlayground
Make sure both field1 and field2 are of equal length or you will lose some data.
I have a MongoDB database with the following document structure:
{
"name": "ServiceA",
"areas": ["X", "Y", "Z"],
"tags": [
{
"name": "Financial",
"type": "A"
},
{
"name": "Consumer",
"type": "B"
}
]
}
There's many entries each with the same structure. Containing the same areas.
There's many predefined tag names, sorted into a few types.
The aim is to group by area and then count the number of occurrences of each tag. So an output like this:
{
"area": "X",
"count": 100, // Total entries with X as an area
"tagNameCount": {
"Financial": 20,
"Consumer": 10,
...
},
"tagTypeCount": {
"A": 70,,
"B: 40
}
}
I've been starting of using $unwind on areas, but it's the next steps from there I'm stuck on. I get that I need to use $group, but I can't work out how to count occurrences.
You may use $facet operator which allows perform several aggregation in one.
Walkthrough
1. We $unwind by area and tags
2. With $facet, we perform 3 parallel aggregations:
2.1 We count unique areas
2.2 We count unique tag names for each area
2.3 We count unique tag type for each area
3. We join 2 parallel arrays by flatten areas
4. We assemble desired output
db.collection.aggregate([
{
$unwind: "$areas"
},
{
$unwind: "$tags"
},
{
$facet: {
areas: [
{
$group: {
_id: "$areas",
count: {
$addToSet: "$_id"
}
}
},
{
$project: {
_id: 0,
area: "$_id",
count: {
$size: "$count"
}
}
}
],
tagNameCount: [
{
$group: {
_id: {
name: "$tags.name",
areas: "$areas"
},
count: {
$addToSet: "$_id"
}
}
},
{
$group: {
_id: "$_id.areas",
tagNameCount: {
$push: {
k: "$_id.name",
v: {
$size: "$count"
}
}
}
}
},
{
$addFields: {
tagNameCount: {
$arrayToObject: "$tagNameCount"
}
}
}
],
tagTypeCount: [
{
$group: {
_id: {
type: "$tags.type",
areas: "$areas"
},
count: {
$addToSet: "$_id"
}
}
},
{
$group: {
_id: "$_id.areas",
tagTypeCount: {
$push: {
k: "$_id.type",
v: {
$size: "$count"
}
}
}
}
},
{
$addFields: {
tagTypeCount: {
$arrayToObject: "$tagTypeCount"
}
}
}
]
}
},
{
$unwind: "$areas"
},
{
$addFields: {
"tagNameCount": {
$filter: {
input: "$tagNameCount",
cond: {
$eq: [
"$areas.area",
"$$this._id"
]
}
}
},
"tagTypeCount": {
$filter: {
input: "$tagTypeCount",
cond: {
$eq: [
"$areas.area",
"$$this._id"
]
}
}
}
}
},
{
$project: {
area: "$areas.area",
count: "$areas.count",
tagNameCount: {
$arrayElemAt: [
"$tagNameCount.tagNameCount",
0
]
},
tagTypeCount: {
$arrayElemAt: [
"$tagTypeCount.tagTypeCount",
0
]
}
}
},
{
$sort: {
area: 1
}
}
])
MongoPlayground
Here's one method:
unwind both areas and tags
for each area collect the applicable tags, and the unique names and types
count the names to get the total number of tags
for each unique name, count the matching values in the tags
do the same for each unique type
project out the unique fields
db.collection.aggregate([
{$unwind: "$areas"},
{$unwind: "$tags"},
{$group: {
_id: "$areas",
names: {$push: "$tags.name"},
uniqueNames: {$addToSet: "$tags.name"},
types: {$push: "$tags.type"},
uniqueTypes: {$addToSet: "$tags.type"}
}},
{$addFields: {
count: {$size: "$names"},
names: {
$arrayToObject: {
$map: {
input: "$uniqueNames",
as: "needle",
in: {
k: "$$needle",
v: {
$size: {
$filter: {
input: "$names",
cond: {$eq: ["$$this","$$needle"]}
}}}}}}},
types: {
$arrayToObject: {
$map: {
input: "$uniqueTypes",
as: "needle",
in: {
k: "$$needle",
v: {$size: {
$filter: {
input: "$types",
cond: { $eq: [ "$$this","$$needle"]}
}}}}}}}}},
{
$project: {
uniqueNames: 0,
uniqueTypes: 0
}}
])
Playground
I have following structure in users collection:
[
{ "name": "Ivan",
"payments": [
{"date": new Date("2019-01-01"), "details": [{"payment_system": "A", "spent": 95},
{"payment_system": "B", "spent": 123}]},
{"date": new Date("2019-01-03"), "details": [{"payment_system": "A", "spent": 12},
{"payment_system": "B", "spent": 11}]}]},
{ "name": "Mark",
"payments": [
{"date": new Date("2019-01-01"), "details": [{"payment_system": "D", "spent": 456},
{"payment_system": "B", "spent": 123}]},
{"date": new Date("2019-01-02"), "details": [{"payment_system": "A", "spent": 98},
{"payment_system": "C", "spent": 4}]}]}
]
Is it any way to add a field to users who spent more than, lets say 100 during the specific date range in specific payment system?
I tried updateMany, but have no idea how to filter "details" array element based on payment_system field.
For payment_system IN ("A", "C"), date >= "2019-01-02", spent_total >= 100 update should return
[
{ "name": "Ivan", ...},
{ "name": "Mark", "filter_passed": true, ... }
]
This this one:
db.collection.aggregate([
{
$set: {
payments: {
$filter: {
input: "$payments",
cond: { $gte: ["$$this.date", new Date("2019-01-02")] }
}
}
}
},
{
$set: {
spent_total: {
$reduce: {
input: "$payments.details.spent",
initialValue: [],
in: { $concatArrays: ["$$value", "$$this"] }
}
}
}
},
{ $set: { spent_total: { $sum: "$spent_total" } } },
{ $match: { "spent_total": { $gte: 100 } } }
])
Mongo Playground
Update:
Filter by payment_system is a bit longer. You have to $unwind and $group:
db.collection.aggregate([
{
$set: {
payments: {
$filter: {
input: "$payments",
cond: { $gte: ["$$this.date", new Date("2019-01-02")] }
}
}
}
},
{ $unwind: "$payments" },
{
$set: {
"payments.details": {
$filter: {
input: "$payments.details",
cond: { $in: ["$$this.payment_system", ["A", "C"]] }
},
},
}
},
{
$group: {
_id: { _id: "$_id", name: "$name", },
payments: { $push: "$payments" }
}
},
{
$set: {
spent_total: {
$reduce: {
input: "$payments.details.spent",
initialValue: [],
in: { $concatArrays: ["$$value", "$$this"] }
}
}
}
},
{ $set: { spent_total: { $sum: "$spent_total" } } },
{ $match: { "spent_total": { $gte: 100 } } },
{ // just some cosmetic
$project: {
_id: "$_id._id",
name: "$_id.name",
payments: 1
}
}
])
You cannot update your collection like db.collection.updateMany({}, [<the aggregation pipeline from above>]) because it contains $unwind and $group.
However, you can make $lookup or $out to save entire result into new collection.
If you need to sum up for each payment_system individually then try:
db.collection.aggregate([
{
$set: {
payments: {
$filter: {
input: "$payments",
cond: { $gte: ["$$this.date", new Date("2019-01-01")] }
}
}
}
},
{ $unwind: "$payments" },
{
$set: {
"payments.details": {
$filter: {
input: "$payments.details",
cond: { $in: ["$$this.payment_system", ["A", "B","C"]] }
},
},
}
},
{ $unwind: "$payments.details" },
{
$group: {
_id: {
_id: "$_id",
name: "$name",
payments: "$payments.details.payment_system"
},
spent_total: { $sum: "$payments.details.spent" }
}
},
{ $match: { "spent_total": { $gte: 100 } } },
{
$project: {
_id: "$_id._id",
name: "$_id.name",
payments: "$_id.payments",
spent_total: 1
}
}
])
Given documents such as
{
_id: 'abcd',
userId: '12345',
activities: [
{ status: 'login', timestamp: '10000001' },
{ status: 'logout', timestamp: '10000002' },
{ status: 'login', timestamp: '10000003' },
{ status: 'logout', timestamp: '10000004' },
]
}
I am trying to create a pipeline such as all users that have their latest login/logout activities recorded between two timestamps will be returned. For example, if the two timestamp values are between 10000002 and 10000003, the expected document should be
{
_id: 'abcd',
userId: '12345',
login: '10000003',
logout: '10000002'
}
Of if the two timestamp values are between -1 and 10000001, the expected document should be :
{
_id: 'abcd',
userId: '12345',
login: '10000001',
logout: null
}
Etc.
I know it has to do with aggregations, and I need to $unwind, etc., but I'm not sure about the rest, namely evaluating two fields from the same document array
You can try below aggregation:
db.col.aggregate([
{
$unwind: "$activities"
},
{
$match: {
$and: [
{ "activities.timestamp": { $gte: "10000001" } },
{ "activities.timestamp": { $lte: "10000002" } }
]
}
},
{
$sort: {
"activities.timestamp": -1
}
},
{
$group: {
_id: "$_id",
userId: { $first: "$userId" },
activities: { $push: "$activities" }
}
},
{
$addFields: {
login: { $arrayElemAt: [ { $filter: { input: "$activities", as: "a", cond: { $eq: [ "$$a.status", "login" ] } } } , 0 ] },
logout: { $arrayElemAt: [ { $filter: { input: "$activities", as: "a", cond: { $eq: [ "$$a.status", "logout" ] } } } , 0 ] }
}
},
{
$project: {
_id: 1,
userId: 1,
login: { $ifNull: [ "$login.timestamp", null ] },
logout: { $ifNull: [ "$logout.timestamp", null ] }
}
}
])
We need to use $unwind + $sort + $group to make sure that our activities will be sorted by timestamp. After $unwind you can use $match to apply filtering condition. Then you can use $filter with $arrayElemAt to get first (latest) value of filtered array. In the last $project you can explicitly use $ifNull (otherwise JSON key will be skipped if there's no value)
You can use below aggregation
Instead of $unwind use $lte and $gte with the $fitler aggregation.
db.collection.aggregate([
{ "$project": {
"userId": 1,
"login": {
"$max": {
"$filter": {
"input": "$activities",
"cond": {
"$and": [
{ "$gte": ["$$this.timestamp", "10000001"] },
{ "$lte": ["$$this.timestamp", "10000004"] },
{ "$lte": ["$$this.status", "login"] }
]
}
}
}
},
"logout": {
"$max": {
"$filter": {
"input": "$activities",
"cond": {
"$and": [
{ "$gte": ["$$this.timestamp", "10000001"] },
{ "$lte": ["$$this.timestamp", "10000004"] },
{ "$lte": ["$$this.status", "logout"] }
]
}
}
}
}
}}
])