Aggregate and project with multiples conditions

Aggregate and project with multiples conditions - mongodb

I have a collection myCollection with array of members :
{
name : String,
members: [{status : Number, memberId : {type: Schema.Types.ObjectId, ref: 'members'}]
}
and i have this data
"_id" : ObjectId("5e83791eb49ab07a48e0282b")
"members" : [
{
"status" : 1,
"_id" : ObjectId("5e83791eb49ab07a48e0282c"),
"memberId" : ObjectId("5e7dbf5b257e6b18a62f2da9")
},
{
"status" : 2,
"_id" : ObjectId("5e837944b49ab07a48e0282d"),
"memberId" : ObjectId("5e7de2dbe027f43adf678db8")
}
],
I want to check by aggregate query if member 5e7dbf5b257e6b18a62f2da9 exists with status 1 but it didn't return true
db.getCollection('myCollection').aggregate([
{$match: {_id: ObjectId("5e83791eb49ab07a48e0282b")}},
{
$project: {
isMember: {
$cond: [
{ $and: [ {$in: [ObjectId("5e7dbf5b257e6b18a62f2da9"), '$members.memberId']}, {$eq: ['$members.status', 1]} ] },
// if
true, // then
false // else
]
}
}
}
])
Thank you for your responses.

If you want to get just true/false you can shortcut like this:
db.collection.aggregate([
{ $match: { _id: ObjectId("5e83791eb49ab07a48e0282b") } },
{
$project: {
isMember: {
$map: {
input: "$members",
in: {
$and: [
{ $eq: [ObjectId("5e7dbf5b257e6b18a62f2da9"), '$$this.memberId'] },
{ $eq: [1, '$$this.status'] }
]
}
}
}
}
},
{ $set: { isMember: { $anyElementTrue: "$isMember" } } }
])
A different style would be this:
db.collection.aggregate([
{ $match: { _id: ObjectId("5e83791eb49ab07a48e0282b") } },
{
$project: {
isMember: {
$map: {
input: "$members",
in: {
$eq: [
{ memberId: ("5e7dbf5b257e6b18a62f2da9"), status: 1 },
{ memberId: "$$this.memberId", status: "$$this.status" }
]
}
}
}
}
},
{ $set: { isMember: { $anyElementTrue: "$isMember" } } }
])

Related

MongoDB Query based on value in array and return a single field

I have a collection called countries:
{
"_id" : "123456",
"enabled" : true,
"entity" : {
"name" : [
{
"locale" : "en",
"value" : "Lithuania"
},
{
"locale" : "de",
"value" : "Litauen"
}
]
}
}
I like to return only the ObjectId and the value when the locale is "en".
{"_id":"123456", "value":"Lithuania"}
Ideally renaming value to country for:
{"_id":"123456", "country":"Lithuania"}
Using a projection like:
db.countries.aggregate([
{$project:
{country: {$arrayElemAt:["$entity.name",0]}}
}
])
returns almost the desired results:
{"_id" : "1234565", "country" : { "locale" : "en", "value" : "Lithuania" } }

This this one:
db.collection.aggregate([
{
$set: {
country: {
$filter: {
input: "$entity.name",
cond: { $eq: [ "$this.locale", "en" ] }
}
}
}
},
{ $project: { country: { $first: "$country.value" } } },
])
See Mongo playground

You can try,
$reduce to iterate loop of entity.name array, $cond check locale is "en" then return value
db.collection.aggregate([
{
$project: {
country: {
$reduce: {
input: "$entity.name",
initialValue: "",
in: {
$cond: [
{ $eq: ["$$this.locale", "en"] },
"$$this.value",
"$$value"
]
}
}
}
}
}
])
Playground

I should have specified the MongoDB Version. Server is running 3.6.20. For that the following solution works:
db.countries.aggregate([
{
$addFields: {
country: {
$filter: {
input: "$entity.name",
cond: { $eq: [ "$$this.locale", "en" ] }
}
}
}
},
{ $project: { country: { $arrayElemAt: [["$country.value"],0] } } },
])

Mongodb: $unwind and compute $avg

I have documents storing IoT data.
Following MongoDB schema design best practices for IoT, I came to documents with the following structure:
"_id" : "AQ106_2020-09-12T09",
"date" : "2020-09-12T09:00:00.000Z",
"station" : {
"name" : "AQ106",
"loc" : {
"type" : "Point",
"coordinates" : [
14.339263,
40.814224
]
},
"properties" : {
}
},
"samples" : [
{
"t" : ISODate("2020-09-12T11:02:00.000+02:00"),
"data" : {
"pm1_mg_m3" : 2.7,
"pm2_5_mg_m3" : 4.6,
"pm10_mg_m3" : 12,
"P0" : 152,
"P1" : 16,
"P2" : 4.7,
"P3" : 0.8,
"P4" : 0.86,
"P5" : 0.6,
"P6" : 0.28,
"P7" : 0.152,
"P8" : 0.094,
"P9" : 0.092,
"P10" : 0.019,
"P11" : 0,
"P12" : 0,
"P13" : 0.0188,
"P14" : 0,
"P15" : 0,
"P16" : 0,
"P17" : 0,
"P18" : 0,
"P19" : 0,
"P20" : 0,
"P21" : 0,
"P22" : 0,
"P23" : 0,
"temp_celsius" : 32.59,
"humRelPercent" : 34,
"press_mBar" : 1010.79,
"CO2mA" : 4,
"NO2_WE_mV" : 226.419,
"NO2_AE_mV" : 229.553,
"OX_WE_mV" : 252.287,
"OX_AE_mV" : 220.419,
"CO_WE_mV" : 509.077,
"AE_WE_mV" : 348.51,
"batt_V" : 13.5,
"source_V" : 17.6
}
},
.... additional arrays
}
Now I want to compute hourly or daily averages (or another metric) to populate a new collection with only summarised data.
I coded the following solution for hourly means:
db.collection.aggregate([{$match: {
'station.name':'AQ104'
}}, {$unwind: {
path: "$samples"
}}, {$group: {
_id: "$date",
P0: {
$avg : "$samples.data.P0"
},
temp:{
$avg:"$samples.data.temp_celsius"
}
}}])
This works but I need to manually create a field for each property in samples.data in the original document and that's tedious.
Moreover, how to group both by date and station.name?
You can find a working example here.
Thanks.

Let's start with the easy question, how to group on multiple fields? With a simple syntax change:
{
$group: {
_id: {
date: "$date",
station: "$station.name"
}
}
Now for the second question this will be a bit more tedious. Mongo does not support "merging" objects by their keys with custom logic (in this case $avg). So we will have to convert the object to an array. unwind it, calculate the average per field and eventually group to restore the required structure like so:
db.collection.aggregate([
{
$match: {
"station.name": "AQ106"
}
},
{
$unwind: {
path: "$samples"
}
},
{
$addFields: {
objArr: {
"$objectToArray": "$samples.data"
}
}
},
{
$unwind: "$objArr"
},
{
$group: {
_id: {
date: "$date",
station: "$station.name",
objKey: "$objArr.k"
},
value: {
$avg: "$objArr.v"
}
}
},
{
$addFields: {
data: {
"$arrayToObject": [
[
{
k: "$_id.objKey",
v: "$value"
}
]
]
}
}
},
{
$group: {
_id: {
date: "$_id.date",
station: "$_id.station"
},
data: {
"$mergeObjects": "$data"
}
}
},
{
$replaceRoot: {
newRoot: {
"$mergeObjects": [
"$data",
"$_id"
]
}
}
}
])
MongoPlayground
------- EDIT ---------
For Mongo v4.4+ you can use $accumulator which allows you to execute custom javascript code in your pipeline. I am unsure how this will fare against the native Mongo pipeline in terms of performance in scale.
One thing to note is that I added the initial $addFields stage under the assumption that different samples may have different keys. if this is not the case it is not needed.
db.collection.aggregate([
{
$addFields: {
sampleKeys: {
$reduce: {
input: {
$map: {
input: "$samples",
as: "sample",
in: {
$map: {
input: {
"$objectToArray": "$$sample.data"
},
as: "sampleArrItem",
in: "$$sampleArrItem.k"
}
}
}
},
initialValue: [],
in: {
"$setUnion": [
"$$this",
"$$value"
]
}
}
}
}
},
{
$addFields: {
samples: {
$accumulator: {
init: function(keys){
return keys.map(k => {return {k: {v: 0, c: 0}}});
},
initArgs: ["$sampleKeys"],
accumulateArgs: ["$samples"],
accumulate: function(state, sample){
Object.keys(state).forEach((key) => {
if (key in sample.data) {
state[key].v += sample.data[key];
state[key].c++;
};
});
return state;
},
merge: function(state1, state2){
Object.keys(state1).forEach((key) => {
state1[key].v += state2[key].v;
state1[key].c += state2[key].c;
});
return state1;
},
lang: "js"
}
}
}
},
{
$replaceRoot: {
newRoot: {
$mergeObject: [
"$samples",
{station: "$station.name", date: "$date"},
]
}
}
}
])

I partially resolved my question in terms of grouping by multiple fields (MongoDB documentation was not so clear at this regard, in my opinion)
db.collection.aggregate([
{
$unwind: {
path: "$samples"
}
},
{
$group: {
_id: {
date: "$date",
station: "$station.name"
},
P0: {
$avg: "$samples.data.P0"
},
temp: {
$avg: "$samples.data.temp_celsius"
}
}
}
])
Here the updated working example.
Thanks to Tom Slabbaert, I solved my question with the following query:
db.collection.aggregate([
{
$unwind: {
path: "$samples"
}
},
{
$addFields: {
objArr: {
"$objectToArray": "$samples.data"
}
}
},
{
$unwind: "$objArr"
},
{
$group: {
_id: {
date: "$date",
station: "$station",
objKey: "$objArr.k"
},
value: {
$avg: "$objArr.v"
}
}
},
{
$addFields: {
data: {
"$arrayToObject": [
[
{
k: "$_id.objKey",
v: "$value"
}
]
]
}
}
},
{
$group: {
_id: {
date: "$_id.date",
station: "$_id.station"
},
data: {
"$mergeObjects": "$data"
}
}
},
{
"$project": {
_id: "$_id.date",
station: "$_id.station",
data: 1
}
}
])
Here
I wonder if it is possible to simplify the above solution using the new $function operator.
Thanks.

Count Both Outer and Inner embedded array in a single query

{
_id: ObjectId("5dbdacc28cffef0b94580dbd"),
"comments" : [
{
"_id" : ObjectId("5dbdacc78cffef0b94580dbf"),
"replies" : [
{
"_id" : ObjectId("5dbdacd78cffef0b94580dc0")
},
]
},
]
}
How to count the number of element in comments and sum with number of relies
My approach is do 2 query like this:
1. total elements of replies
db.posts.aggregate([
{$match: {_id:ObjectId("5dbdacc28cffef0b94580dbd")}},
{ $unwind: "$comments",},
{$project:{total:{$size:"$comments.replies"} , _id: 0} }
])
2. count total elements of comments
db.posts.aggregate([
{$match: {_id:ObjectId("5dbdacc28cffef0b94580dbd")}},
{$project:{total:{$size:"$comments.replies"} , _id: 0} }
])
Then sum up both, do we have any better solution to write the query like return the sum of of total element comments + replies

You can use $reduce and $concatArrays to "merge" an inner "array of arrays" into a single list and measure the $size of that. Then simply $add the two results together:
db.posts.aggregate([
{ "$match": { _id:ObjectId("5dbdacc28cffef0b94580dbd") } },
{ "$addFields": {
"totalBoth": {
"$add": [
{ "$size": "$comments" },
{ "$size": {
"$reduce": {
"input": "$comments.replies",
"initialValue": [],
"in": {
"$concatArrays": [ "$$value", "$$this" ]
}
}
}}
]
}
}}
])
Noting that an "array of arrays" is the effect of an expression like $comments.replies, so hence the operation to make these into a single array where you can measure all elements.

Try using the $unwind to flatten the list you get from the $project before using $count.

This is another way of getting the result.
Input documents:
{ "_id" : 1, "array1" : [ { "array2" : [ { id: "This is a test!"}, { id: "test1" } ] }, { "array2" : [ { id: "This is 2222!"}, { id: "test 222" }, { id: "222222" } ] } ] }
{ "_id" : 2, "array1" : [ { "array2" : [ { id: "aaaa" }, { id: "bbbb" } ] } ] }
The query:
db.arrsizes2.aggregate( [
{ $facet: {
array1Sizes: [
{ $project: { array1Size: { $size: "$array1" } } }
],
array2Sizes: [
{ $unwind: "$array1" },
{ $project: { array2Size: { $size: "$array1.array2" } } },
],
} },
{ $project: { result: { $concatArrays: [ "$array1Sizes", "$array2Sizes" ] } } },
{ $unwind: "$result" },
{ $group: { _id: "$result._id", total1: { $sum: "$result.array1Size" }, total2: { $sum: "$result.array2Size" } } },
{ $addFields: { total: { $add: [ "$total1", "$total2" ] } } },
] )
The output:
{ "_id" : 2, "total1" : 1, "total2" : 2, "total" : 3 }
{ "_id" : 1, "total1" : 2, "total2" : 5, "total" : 7 }

Query datevalue of a inner Array element

Need help with some MongoDB query:
The document I have is below and I am trying to search based on 2 conditions
The meta.tags.code = "ABC"
Its LastSyncDateTime should
meta.extension.value == "" (OR)
the meta.extension.value is less than meta.lastUpdated
Data :
{
"meta" : {
"extension" : [
{
"url" : "LastSyncDateTime",
"value" : "20190206-00:49:25.694"
},
{
"url" : "RetryCount",
"value" : "0"
}
],
"lastUpdate" : "20190207-01:21:41.095",
"tags" : [
{
"code" : "ABC",
"system" : "type"
},
{
"code" : "XYZ",
"system" : "SourceSystem"
}
]
}
}
Query:
db.proc_patients_service.find({
"meta.tags.code": "ABC",
$or: [{
"meta.extension.value": ""
}, {
$expr: { "$lt": [{ "mgfunc": "ISODate", "params": [{ "$arrayElemAt": ["$meta.extension.value", 0] }] }, { "mgfunc": "ISODate", "params": ["$meta.lastUpdate"] }] }
}]
})
But it is only fetching ABC Patients whose LastSyncDateTime is empty and ignores the other condition.

Using MongoDB Aggregation, I have converted your string to date with operator $dateFromString and then compare the value as per your criteria.
db.proc_patients_service.aggregate([
{ $match: { "meta.tags.code": "ABC", } },
{ $unwind: "$meta.extension" },
{
$project: {
'meta.tags': '$meta.tags',
'meta.lastUpdate': { '$dateFromString': { 'dateString': '$meta.lastUpdate', format: "%Y%m%d-%H:%M:%S.%L" } },
'meta.extension.url': '$meta.extension.url',
'meta.extension.value': {
$cond: {
if: { $ne: ["$meta.extension.value", "0"] }, then: { '$dateFromString': { 'dateString': '$meta.extension.value', format: "%Y%m%d-%H:%M:%S.%L" } }, else: 0
}
}
}
},
{
$match: {
$or: [
{ "meta.extension.value": 0 },
{ $expr: { $lt: ["$meta.extension.value", "$meta.lastUpdate"] } }
]
}
},
{
$group: { _id: '_id', 'extension': { $push: '$meta.extension' }, "lastUpdate": { $first: '$meta.lastUpdate' }, 'tags': { $first: '$meta.tags' } }
},
{
$project: { meta: { 'extension': '$extension', lastUpdate: '$lastUpdate', 'tags': '$tags' } }
}
])

Compare 2 count aggregations

I have a collection in MongoDB that looks something like the following:
{ "_id" : 1, "type" : "start", userid: "101", placementid: 1 }
{ "_id" : 2, "type" : "start", userid: "101", placementid: 2 }
{ "_id" : 3, "type" : "start", userid: "101", placementid: 3 }
{ "_id" : 4, "type" : "end", userid: "101", placementid: 1 }
{ "_id" : 5, "type" : "end", userid: "101", placementid: 2 }
and I want to group results by userid then placementid and then count the types of "start" and "end", but only when the two counts are different. In this particular example I would want to get placementid: 3 because when grouped and counted this is the only case where the counts don't match.
I've written a query that gets the 2 counts and the grouping but I can't do the filtering when counts don't match. This is my query:
db.getCollection('mycollection').aggregate([
{
$project: {
userid: 1,
placementid: 1,
isStart: {
$cond: [ { $eq: ["$type", "start"] }, 1, 0]
},
isEnd: {
$cond: [ { $eq: ["$type", "end"] }, 1, 0]
}
}
},
{
$group: {
_id: { userid:"$userid", placementid:"$placementid" },
countStart:{ $sum: "$isStart" },
countEnd: { $sum: "$isEnd" }
}
},
{
$match: {
countStart: {$ne: "$countEnd"}
}
}
])
It seems like I'm using the match aggregation incorrectly because I'm seeing results where countStart and countEnd are the same.
{ "_id" : {"userid" : "101", "placementid" : "1"}, "countStart" : 1.0, "countEnd" : 1.0 }
{ "_id" : {"userid" : "101", "placementid" : "2"}, "countStart" : 1.0, "countEnd" : 1.0 }
{ "_id" : {"userid" : "101", "placementid" : "3"}, "countStart" : 1.0, "countEnd" : 0 }
Can anybody point into the right direction please?

To compare two fields inside $match stage you need $expr which is available in MongoDB 3.6:
db.myCollection.aggregate([
{
$project: {
userid: 1,
placementid: 1,
isStart: {
$cond: [ { $eq: ["$type", "start"] }, 1, 0]
},
isEnd: {
$cond: [ { $eq: ["$type", "end"] }, 1, 0]
}
}
},
{
$group: {
_id: { userid:"$userid", placementid:"$placementid" },
countStart:{ $sum: "$isStart" },
countEnd: { $sum: "$isEnd" }
}
},
{
$match: {
$expr: { $ne: [ "$countStart", "$countEnd" ] }
}
}
])
If you're using older version of MongoDB you can use $redact:
db.myCollection.aggregate([
{
$project: {
userid: 1,
placementid: 1,
isStart: {
$cond: [ { $eq: ["$type", "start"] }, 1, 0]
},
isEnd: {
$cond: [ { $eq: ["$type", "end"] }, 1, 0]
}
}
},
{
$group: {
_id: { userid:"$userid", placementid:"$placementid" },
countStart:{ $sum: "$isStart" },
countEnd: { $sum: "$isEnd" }
}
},
{
$redact: {
$cond: { if: { $ne: [ "$countStart", "$countEnd" ] }, then: "$$KEEP", else: "$$PRUNE" }
}
}
])

You run do the following pipeline to get this - no need to use $expr or $redact or anything special really:
db.mycollection.aggregate({
$group: {
_id: {
"userid": "$userid",
"placementid": "$placementid"
},
"sum": {
$sum: {
$cond: {
if: { $eq: [ "$type", "start" ] },
then: 1, // +1 for start
else: -1 // -1 for anything else
}
}
}
}
}, {
$match: {
"sum": { $ne: 0 } // only return the non matching-up ones
}
})