MongoDB Aggregation : Group on common field of two arrays - mongodb

Below is a sample document:
{
'uid': 1,
'sent': [
{
'mid': 100,
'date': 20171210,
},
{
'mid': 101,
'date': 20171210,
}
],
'open': [
{
'mid': 100,
'date': 20171220,
},
{
'mid': 101,
'date': 20171220,
}
]
}
I want to group on 'uid' and nested 'mid' fields.
My desired output is :
{
'uid': 1,
'mid': 100,
'sent': [ 20171210 ],
'open': [ 20171220 ]
}
{
'uid': 1,
'mid': 101,
'sent': [ 20171210 ],
'open': [ 20171220 ]
}
Is there any efficient way of Aggregation which can give me above result?

You can $unwind the one array, then use $filter to keep only the matching entries in the second array. Then $unwind the second array and $group.
db.temp.aggregate(
[
{
$unwind: {
'path': '$sent',
}
},
{
$project: {
'uid': 1,
'sent': 1,
'open': { $filter: {
input: '$open',
as: 'this',
cond: { $eq: [ '$sent.mid', '$$this.mid' ] }
} }
}
},
{
$unwind: {
'path': '$open',
}
},
{
$group: {
'_id': { 'uid': '$uid', 'mid': '$sent.mid' },
'sent': { '$push': '$sent.date' },
'open': { '$push': '$open.date' }
}
},
{
$project: {
'_id': 0,
'uid': '$_id.uid',
'mid': '$_id.mid',
'sent': 1,
'open': 1
}
},
]
);

Related

MongoDB: how to combine two different data structure to one

Here is my collection:
db.sites_people.find({person_id: ObjectId('62df5722d361708c1f0a7e9f')}, {user_groups: 1}):
[
{
'_id': { '$oid': '62df5769b6f6345d3e91130d' },
'user_groups': [
{
'id': { '$oid': '62df5769d361708c1f0a7ea1' },
},
],
},
{
'_id': { '$oid': '62df584eb6f6345d3e91138a' },
'user_groups': [
{ '$oid': '62df5769d361708c1f0a7ea1' },
],
},
];
As you can see, there are two different-structure under user_groups .
I try to use $ifNull to combine the user_groups, but it's not work.
db.sites_people.aggregate([
{ $match: { person_id: ObjectId('62df5722d361708c1f0a7e9f') } },
{ $project: { user_groups: { $ifNull: ['$user_groups.id', '$user_groups'] } } },
]);
[
{
'_id': { '$oid': '62df5769b6f6345d3e91130d' },
'user_groups': [
{ '$oid': '62df5769d361708c1f0a7ea1' },
],
},
{
'_id': { '$oid': '62df584eb6f6345d3e91138a' },
'user_groups': [],
},
];
How can I get the result like this:
[
{
'_id': { '$oid': '62df5769b6f6345d3e91130d' },
'user_groups': [
{ '$oid': '62df5769d361708c1f0a7ea1' },
],
},
{
'_id': { '$oid': '62df584eb6f6345d3e91138a' },
'user_groups': [
{ '$oid': '62df5769d361708c1f0a7ea1' },
],
},
];
Best way is to just use $map to convert the arrays, this will also allow you to have unified structures for mixed type arrays ( one element with id and one without ), like so:
db.sites_people.aggregate([
{
$match: {
person_id: ObjectId("62df5722d361708c1f0a7e9f")
}
},
{
$project: {
user_groups: {
$map: {
input: "$user_groups",
in: {
$ifNull: [
"$$this.id",
"$$this"
]
}
}
}
}
}
])
Mongo Playground

How can i aggregate filter nested documents and get value from other field

I have a collection like this:
{
'_id' : ObjectId('6251f8556e75125f9260f333'),
'name': 'jojo',
'profile': 'jojo profile',
'date': ISODate("2022-04-09T21:18:40.473Z"),
'look': [
{ 'art': 'group-id', 'data': 'alma', 'dt': '1'},
{ 'art': 'called', 'data': 'central', 'dt': '1'},
{ 'art': 'access-time', 'data': 108000, 'dt': '1'}
]
'answer': [
{ 'art': 'rate-id', 'data': 'limit1', 'dt': '1'},
{ 'art': 'protocol', 'data': 'tcp', 'dt': '1'}
]
},
{
'_id' : ObjectId('6251f8306e75125f9260f332'),
'name': 'dodo',
'profile': 'dodo profile',
'date': ISODate("2022-04-09T15:20:58.562Z"),
'look': [
{ 'art': 'group-id', 'data': 'alma', 'dt': '1'},
{ 'art': 'called', 'data': 'central', 'dt': '1'},
]
'answer': [
{ 'art': 'rate-id', 'data': 'limit1', 'dt': '1'},
]
},
{
'_id' : ObjectId('6251a5113700ba4a0a59c48f'),
'name': 'kaka',
'profile': 'kaka profile',
'date': ISODate("2022-04-09T15:22:25.816Z"),
'look': [
{ 'art': 'access-time', 'data': 50400, 'dt': '1'}
]
'answer': [
{ 'art': 'protocol', 'data': 'tcp', 'dt': '1'}
]
}
and I was expecting an output like this:
{
'_id' : ObjectId('6251f8556e75125f9260f333'),
'name': 'jojo',
'profile': 'jojo profile',
'date': ISODate("2022-04-09T21:18:40.473Z"),
'goup': 'alma', // filter by 'group-id' and put value of data field
'called': 'central', // filter by 'called' and put value of data field
'accessTime': 108000, // filter by 'access-time' and put value of data field
'rate': 'limi1', // filter by 'rate-id' and put value of data field
'protocol': 'tcp', // filter by 'protocol' and put value of data field
},
{
'_id' : ObjectId('6251f8306e75125f9260f332'),
'name': 'dodo',
'profile': 'dodo profile',
'date': ISODate("2022-04-09T15:20:58.562Z"),
'goup': 'alma',
'called': 'central',
'accessTime': '', // set blank data if not exist
'rate': 'limi1',
'protocol': '', // set blank data if not exist
},
{
'_id' : ObjectId('6251a5113700ba4a0a59c48f'),
'name': 'kaka',
'profile': 'kaka profile',
'date': ISODate("2022-04-09T15:22:25.816Z"),
'goup': '', // set blank data if not exist
'called': '', // set blank data if not exist
'accessTime': 50400,
'rate': '', // set blank data if not exist
'protocol': 'tcp',
}
I've searched here but couldn't find an answer that matches the problem I'm facing, probably because of the wrong keywords.
Since I'm new to mongodb, I'm confused about how to solve the query I want. How can I achieve this? Please help me...
You would require an aggregate operation that has a pipeline with the following key operators and stages:
$map: an operator to transform the look and answer arrays into documents with just mapped k and v fields, crucial for obtaining a hash map with the following operator
$arrayToObject: this allows the above to be possible i.e. converting an array into a single document
$mergeObjects: combine top level fields i.e. _id, date, name, profile together with the converted documents above
$replaceWith: pipeline stage to replace the root document with the specified document from above
Overall, your pipeline should follow:
const first = {
$first: {
$split: ['$$this.art', '-']
}
};
const keyExpression = {
$cond: [
{ $eq: [first, 'access'] },
'accessTime',
first
]
};
const pipeline = [
{ $replaceWith: {
$mergeObjects: [
{
_id: '$_id',
date: '$date',
name: '$name',
profile: '$profile',
protocol: '',
group: '',
called: '',
rate: '',
accessTime: '',
},
{ $arrayToObject: {
$map: {
input: '$look',
in: { k: keyExpression, v: '$$this.data' }
}
} },
{ $arrayToObject: {
$map: {
input: '$answer',
in: { k: keyExpression, v: '$$this.data' }
}
} }
]
} }
]
Mongo Playground
For this you should use the aggregation framework of mongo db, because will require complex operations to get the data in the shape that you want.
https://www.mongodb.com/docs/manual/aggregation/
Every aggregation is an array of stages and every stage does something specific.
I used the next stages:
addFields: Allows you to add new fields to the response of every document, so if you don't have group in the document, that will add or replace it.
project: Allows you remove some fields of a document. In a projection stage if you set an attribute as 0 that will remove that attribute from the response.
Also I used some operators:
filter: this allows you to filter data of an element that is an array
arrayElemenAt: receives an array and return the position specified
The pipeline:
[
{
"$addFields":{
"group":{
"$arrayElemAt":[
{
"$filter":{
"input":"$look",
"as":"item",
"cond":{
"$eq":[
"$$item.art",
"group-id"
]
}
}
},
0
]
},
"called":{
"$arrayElemAt":[
{
"$filter":{
"input":"$look",
"as":"item",
"cond":{
"$eq":[
"$$item.art",
"called"
]
}
}
},
0
]
},
"accessTime":{
"$arrayElemAt":[
{
"$filter":{
"input":"$look",
"as":"item",
"cond":{
"$eq":[
"$$item.art",
"access-time"
]
}
}
},
0
]
},
"rate":{
"$arrayElemAt":[
{
"$filter":{
"input":"$answer",
"as":"item",
"cond":{
"$eq":[
"$$item.art",
"rate-id"
]
}
}
},
0
]
},
"protocol":{
"$arrayElemAt":[
{
"$filter":{
"input":"$answer",
"as":"item",
"cond":{
"$eq":[
"$$item.art",
"protocol"
]
}
}
},
0
]
}
}
},
{
"$addFields":{
"group":"$group.data",
"called":"$called.data",
"accessTime":"$accessTime.data",
"rate":"$rate.data",
"protocol":"$protocol.data"
}
},
{
"$project":{
"look":0,
"answer":0
}
}
]
This is quite cumbersome with the current structure, as for each field you have to convert the object to an array, filter it then convert it back, here's how it looks:
db.collection.aggregate([
{
$replaceRoot: {
newRoot: {
"$mergeObjects": [
{
_id: "$_id",
name: "$name",
profile: "$profile",
date: "$date",
},
{
"$arrayToObject": {
$map: {
input: {
$filter: {
input: {
$objectToArray: {
$ifNull: [
{
"$arrayElemAt": [
{
$filter: {
input: {
$ifNull: [
"$look",
[]
]
},
cond: {
$eq: [
"$$this.art",
"group-id"
]
}
}
},
0
]
},
{
art: ""
}
]
}
},
cond: {
$eq: [
"$$this.k",
"data"
]
}
}
},
in: {
k: "goup",
v: "$$this.v"
}
}
}
},
{
"$arrayToObject": {
$map: {
input: {
$filter: {
input: {
$objectToArray: {
$ifNull: [
{
"$arrayElemAt": [
{
$filter: {
input: {
$ifNull: [
"$look",
[]
]
},
cond: {
$eq: [
"$$this.art",
"called"
]
}
}
},
0
]
},
{
art: ""
}
]
}
},
cond: {
$eq: [
"$$this.k",
"data"
]
}
}
},
in: {
k: "called",
v: "$$this.v"
}
}
}
},
{
"$arrayToObject": {
$map: {
input: {
$filter: {
input: {
$objectToArray: {
$ifNull: [
{
"$arrayElemAt": [
{
$filter: {
input: {
$ifNull: [
"$look",
[]
]
},
cond: {
$eq: [
"$$this.art",
"access-time"
]
}
}
},
0
]
},
{
art: ""
}
]
}
},
cond: {
$eq: [
"$$this.k",
"data"
]
}
}
},
in: {
k: "access-time",
v: "$$this.v"
}
}
}
},
{
"$arrayToObject": {
$map: {
input: {
$filter: {
input: {
$objectToArray: {
$ifNull: [
{
"$arrayElemAt": [
{
$filter: {
input: {
$ifNull: [
"$answer",
[]
]
},
cond: {
$eq: [
"$$this.art",
"rate-id"
]
}
}
},
0
]
},
{
art: ""
}
]
}
},
cond: {
$eq: [
"$$this.k",
"data"
]
}
}
},
in: {
k: "rate",
v: "$$this.v"
}
}
}
},
{
"$arrayToObject": {
$map: {
input: {
$filter: {
input: {
$objectToArray: {
$ifNull: [
{
"$arrayElemAt": [
{
$filter: {
input: {
$ifNull: [
"$answer",
[]
]
},
cond: {
$eq: [
"$$this.art",
"protocol"
]
}
}
},
0
]
},
{
art: ""
}
]
}
},
cond: {
$eq: [
"$$this.k",
"data"
]
}
}
},
in: {
k: "protocol",
v: "$$this.v"
}
}
}
}
]
}
}
}
])
Mongo Playground
If you're using Mongo version 5+, then you can use $getField to simplify the syntax a little bit, here's how one field would look like in this syntax:
goup: {
$getField: {
field: 'data',
input: {
'$arrayElemAt': [
{
$filter: {
input: {
$ifNull: [
'$look',
[],
],
},
cond: {
$eq: [
'$$this.art',
'group-id',
],
},
},
},
0,
],
},
},
},

keep latest record based on multiple grouping

I wrote a multi-stage pipeline to arrive at this set of documents:
{'_id': '1234'),
'info': [{'type': 'patient',
'patient_id': 'p1'},
{'type': 'doc',
'doc_id': 'd1'},
{'type': 'ldlc',
'dt': datetime.datetime(2018, 10, 29, 12, 7, 23),
'val': 136},
{'type': 'bp',
'dt': datetime.datetime(2014, 8, 25, 4, 2, 27),
'val': [{'dias': 74}, {'sys': 105}]}]},
{'_id': '1235'),
'info': [{'type': 'patient',
'patient_id': 'p2'},
{'type': 'doc',
'doc_id': 'd1'},
{'type': 'ldlc',
'dt': datetime.datetime(2016, 3, 31, 21, 30, 34),
'val': 153},
{'type': 'bp',
'dt': datetime.datetime(2013, 7, 3, 18, 3, 12),
'val': [{'dias': 86}, {'sys': 101}]},
{'type': 'bp',
'dt': datetime.datetime(2016, 3, 15, 18, 35, 25),
'val': [{'dias': 85}, {'sys': 108}]},
{'type': 'ldlc',
'dt': datetime.datetime(2018, 10, 1, 12, 7, 23),
'val': 144}]}
I am using pymongo, hence the datetime objects.
Now in each document I only want to keep the last recorded values (sort by dt) for 'ldlc' and 'bp'.
I would prefer it to be as:
{
"_id": '1234',
"patient_id": "p1",
"doc_id": "d1".
"sys": 105,
"dias": 74,
"ldlc": 136
},
{
"_id": '1235',
"patient_id": "p2",
"doc_id": "d1".
"sys": 108,
"dias": 85,
"ldlc": 144
}
since the source documents are generated in an aggregation pipeline, i want to add $project and $group stages after that in order to product the desired result.
Thanks for your help!
There are different approaches to achieve this use case.
I started with $sort to sort based on dates. And then used $facet for parallel grouping.Since you need to keep only the latest record, $last is used to get required values.
Your aggregation can look like below:
db.collection.aggregate([
{
$unwind: "$info"
},
{
$sort: {
"info.dt": 1
}
},
{
"$facet": {
"ldlc": [
{
"$match": {
"info.type": "ldlc"
}
},
{
"$group": {
"_id": "$_id",
"ldlc": {
$last: "$info.val"
}
}
}
],
"bp": [
{
"$match": {
"info.type": "bp"
}
},
{
"$group": {
"_id": "$_id",
"bp": {
$last: "$info.val"
}
}
},
{
$unwind: "$bp"
}
],
"others": [
{
$match: {
$or: [
{
"info.type": "patient"
},
{
"info.type": "doc"
}
]
}
},
{
"$group": {
"_id": "$_id",
"ids": {
$push: {
p: "$info.patient_id",
d: "$info.doc_id"
}
}
}
},
{
$unwind: "$ids"
}
],
}
},
{
$project: {
data: {
$concatArrays: [
"$others",
"$ldlc",
"$bp"
]
}
}
},
{
$unwind: "$data"
},
{
"$group": {
"_id": "$data._id",
"val": {
$push: {
patient_id: "$data.ids.p",
doc_id: "$data.ids.d",
ldlc: "$data.ldlc",
dias: "$data.bp.dias",
sys: "$data.bp.sys"
}
}
}
},
{
"$project": {
_id: 1,
"v": {
"$reduce": {
"input": "$val",
"initialValue": {},
"in": {
"$mergeObjects": [
"$$value",
"$$this"
]
}
}
}
}
},
{
"$project": {
_id: 1,
patient_id: "$v.patient_id",
doc_id: "$v.doc_id",
ldlc: "$v.ldlc",
dias: "$v.dias",
sys: "$v.sys"
}
}
])
Check out the query result here: Mongo Playground
PS: This may not be the best approach

MongoDB aggregating multiple arrays of objects based on shared key

I'm writing a query to calculate multiple metrics for each user in my DB.
I've calculated all of the metrics, and have a structure like this
{
"metric1": [{"user_id": 1, "val": 13},{"user_id": 2, "val": 100}],
"metric2": [{"user_id": 2, "val": 29},{"user_id": 1, "val": 123}],
"metric3": [{"user_id": 1, "val": 46},{"user_id": 2, "val": 111]
}
I'm trying to convert the above into this structure
{
"user_id": [1,2],
"metric1": [13, 100],
"metric2": [29,123],
"metric3": [46,111]
}
So that I can display a table showing each user and the three metrics (one metric per column, and one user per row).
considering that your data is what you've said:
{
"metric1": [
{"id1": 1}, {"id2": 2}
],
"metric2": [
{"id2": 22}, {"id1": 11}
],
"metric3": [
{"id2": 222}, {"id1": 111}
]
}
all you've to do is using $unwind to be able to break the array and then $objectToArray to have access to keys
db.blah.aggregate([
{ $unwind: '$metric1' },
{ $unwind: '$metric2' },
{ $unwind: '$metric3' },
{ $project: {'metric1': { $objectToArray: '$metric1' }, 'metric2': { $objectToArray: '$metric2' }, 'metric3': { $objectToArray: '$metric3' }} },
{ $sort: { 'metric1.k' : -1} },
{ $sort: { 'metric2.k' : -1} },
{ $sort: { 'metric3.k' : -1} },
{ $unwind: '$metric1' },
{ $unwind: '$metric2' },
{ $unwind: '$metric3' },
{ $group: {
_id: null,
user_id: { $addToSet: '$metric1.k' },
metric1: { $addToSet: '$metric1.v' },
metric2: { $addToSet: '$metric2.v' },
metric3: { $addToSet: '$metric3.v' },
} },
{ $project: { _id: 0 } }
]).pretty()
which results
{
"user_id" : [
"id1",
"id2"
],
"metric1" : [
1,
2
],
"metric2" : [
11,
22
],
"metric3" : [
111,
222
]
}

How do I flatten the results of an aggregation?

I have the following query...
db.getCollection('apprenticeships')
.aggregate([
{
$match: {
'Vacancy._id': { $in: [1, 2, 3] },
}
},
{
$group: {
'_id': {
'VacancyId': '$Vacancy._id',
'Status': '$Status'
},
'Count': { $sum: 1 }
}
},
{
$sort: {
'_id.VacancyId': 1,
'_id.Status': 1
}
}
])
Which gives results where each element has following structure
{
"_id" : {
"VacancyId" : 1,
"Status" : 90
},
"Count" : 40.0
}
How can I remap that structure so that the elements in the output look like this instead?
{
"VacancyId": 1,
"Status": 90,
"Count": 40
}
You can add $project stage to aggregation pipeline to add new fields VacancyId and status and then hide the _id
db.getCollection('apprenticeships')
.aggregate([{
$match: {
'Vacancy._id': {
$in: [1, 2, 3]
},
}
},
{
$group: {
'_id': {
'VacancyId': '$Vacancy._id',
'Status': '$Status'
},
'Count': {
$sum: 1
}
}
},
{
$sort: {
'_id.VacancyId': 1,
'_id.Status': 1
}
},
{
{
$project:{ 'VacancyId': '$_id.VacancyId', 'Status': '$_id.Status', 'Count': '$Count', '_id': 0 }
}
}
])