Mongodb: $unwind and compute $avg - mongodb

I have documents storing IoT data.
Following MongoDB schema design best practices for IoT, I came to documents with the following structure:
"_id" : "AQ106_2020-09-12T09",
"date" : "2020-09-12T09:00:00.000Z",
"station" : {
"name" : "AQ106",
"loc" : {
"type" : "Point",
"coordinates" : [
14.339263,
40.814224
]
},
"properties" : {
}
},
"samples" : [
{
"t" : ISODate("2020-09-12T11:02:00.000+02:00"),
"data" : {
"pm1_mg_m3" : 2.7,
"pm2_5_mg_m3" : 4.6,
"pm10_mg_m3" : 12,
"P0" : 152,
"P1" : 16,
"P2" : 4.7,
"P3" : 0.8,
"P4" : 0.86,
"P5" : 0.6,
"P6" : 0.28,
"P7" : 0.152,
"P8" : 0.094,
"P9" : 0.092,
"P10" : 0.019,
"P11" : 0,
"P12" : 0,
"P13" : 0.0188,
"P14" : 0,
"P15" : 0,
"P16" : 0,
"P17" : 0,
"P18" : 0,
"P19" : 0,
"P20" : 0,
"P21" : 0,
"P22" : 0,
"P23" : 0,
"temp_celsius" : 32.59,
"humRelPercent" : 34,
"press_mBar" : 1010.79,
"CO2mA" : 4,
"NO2_WE_mV" : 226.419,
"NO2_AE_mV" : 229.553,
"OX_WE_mV" : 252.287,
"OX_AE_mV" : 220.419,
"CO_WE_mV" : 509.077,
"AE_WE_mV" : 348.51,
"batt_V" : 13.5,
"source_V" : 17.6
}
},
.... additional arrays
}
Now I want to compute hourly or daily averages (or another metric) to populate a new collection with only summarised data.
I coded the following solution for hourly means:
db.collection.aggregate([{$match: {
'station.name':'AQ104'
}}, {$unwind: {
path: "$samples"
}}, {$group: {
_id: "$date",
P0: {
$avg : "$samples.data.P0"
},
temp:{
$avg:"$samples.data.temp_celsius"
}
}}])
This works but I need to manually create a field for each property in samples.data in the original document and that's tedious.
Moreover, how to group both by date and station.name?
You can find a working example here.
Thanks.

Let's start with the easy question, how to group on multiple fields? With a simple syntax change:
{
$group: {
_id: {
date: "$date",
station: "$station.name"
}
}
Now for the second question this will be a bit more tedious. Mongo does not support "merging" objects by their keys with custom logic (in this case $avg). So we will have to convert the object to an array. unwind it, calculate the average per field and eventually group to restore the required structure like so:
db.collection.aggregate([
{
$match: {
"station.name": "AQ106"
}
},
{
$unwind: {
path: "$samples"
}
},
{
$addFields: {
objArr: {
"$objectToArray": "$samples.data"
}
}
},
{
$unwind: "$objArr"
},
{
$group: {
_id: {
date: "$date",
station: "$station.name",
objKey: "$objArr.k"
},
value: {
$avg: "$objArr.v"
}
}
},
{
$addFields: {
data: {
"$arrayToObject": [
[
{
k: "$_id.objKey",
v: "$value"
}
]
]
}
}
},
{
$group: {
_id: {
date: "$_id.date",
station: "$_id.station"
},
data: {
"$mergeObjects": "$data"
}
}
},
{
$replaceRoot: {
newRoot: {
"$mergeObjects": [
"$data",
"$_id"
]
}
}
}
])
MongoPlayground
------- EDIT ---------
For Mongo v4.4+ you can use $accumulator which allows you to execute custom javascript code in your pipeline. I am unsure how this will fare against the native Mongo pipeline in terms of performance in scale.
One thing to note is that I added the initial $addFields stage under the assumption that different samples may have different keys. if this is not the case it is not needed.
db.collection.aggregate([
{
$addFields: {
sampleKeys: {
$reduce: {
input: {
$map: {
input: "$samples",
as: "sample",
in: {
$map: {
input: {
"$objectToArray": "$$sample.data"
},
as: "sampleArrItem",
in: "$$sampleArrItem.k"
}
}
}
},
initialValue: [],
in: {
"$setUnion": [
"$$this",
"$$value"
]
}
}
}
}
},
{
$addFields: {
samples: {
$accumulator: {
init: function(keys){
return keys.map(k => {return {k: {v: 0, c: 0}}});
},
initArgs: ["$sampleKeys"],
accumulateArgs: ["$samples"],
accumulate: function(state, sample){
Object.keys(state).forEach((key) => {
if (key in sample.data) {
state[key].v += sample.data[key];
state[key].c++;
};
});
return state;
},
merge: function(state1, state2){
Object.keys(state1).forEach((key) => {
state1[key].v += state2[key].v;
state1[key].c += state2[key].c;
});
return state1;
},
lang: "js"
}
}
}
},
{
$replaceRoot: {
newRoot: {
$mergeObject: [
"$samples",
{station: "$station.name", date: "$date"},
]
}
}
}
])

I partially resolved my question in terms of grouping by multiple fields (MongoDB documentation was not so clear at this regard, in my opinion)
db.collection.aggregate([
{
$unwind: {
path: "$samples"
}
},
{
$group: {
_id: {
date: "$date",
station: "$station.name"
},
P0: {
$avg: "$samples.data.P0"
},
temp: {
$avg: "$samples.data.temp_celsius"
}
}
}
])
Here the updated working example.
Thanks to Tom Slabbaert, I solved my question with the following query:
db.collection.aggregate([
{
$unwind: {
path: "$samples"
}
},
{
$addFields: {
objArr: {
"$objectToArray": "$samples.data"
}
}
},
{
$unwind: "$objArr"
},
{
$group: {
_id: {
date: "$date",
station: "$station",
objKey: "$objArr.k"
},
value: {
$avg: "$objArr.v"
}
}
},
{
$addFields: {
data: {
"$arrayToObject": [
[
{
k: "$_id.objKey",
v: "$value"
}
]
]
}
}
},
{
$group: {
_id: {
date: "$_id.date",
station: "$_id.station"
},
data: {
"$mergeObjects": "$data"
}
}
},
{
"$project": {
_id: "$_id.date",
station: "$_id.station",
data: 1
}
}
])
Here
I wonder if it is possible to simplify the above solution using the new $function operator.
Thanks.

Related

How to perform conditional arithmetic operations in MongoDB

I've following schema
{
"_id" : ObjectId("xxxxx"),
"updatedAt" : ISODate("2022-06-29T13:10:36.659+0000"),
"createdAt" : ISODate("2022-06-29T08:06:51.264+0000"),
"payments" : [
{
"paymentId" : "xxxxx",
"paymentType" : "charge",
"paymentCurrency" : "PKR",
"paymentMode" : "cash",
"paymentTotal" : 13501.88,
"penalties" : 100
},
{
"paymentId" : "ccccc",
"paymentType" : "refund",
"paymentCurrency" : "PKR",
"paymentMode" : "",
"paymentTotal" : 13061.879999999997,
"penalties" : 430.0
}
]
}
I want to get all paymentTotal sum if paymentType is 'charge' else subtract the paymentTotal from the sum if paymentType is other than charge, i.e refund also subtract penalties from total sum
I've tried following query which is not working giving me syntax error like,
A syntax error was detected at the runtime. Please consider using a higher shell version or use the syntax supported by your current shell.
xxx
Blockquote
db.getCollection("booking").aggregate([
{
$match: {
createdAt : {
"$gte":ISODate("2022-06-28"),
"$lte":ISODate("2022-06-30"),
}
}
},
{$unwind: '$payments'},
{
"$group":{
"_id" : "$_id",
"total" : {
$sum: "$payments.paymentTotal"
}
},
},
{
$project :
{
"grandTotal":{
$cond:{
if:{$eq:["$payments.paymentType", "charge"]},
then:{$add : {"$total,$payments.paymentTotal"}},
else:{ $subtract: {"$total,$payments.paymentTotal"}}
}
}
}
}
]);
I've tried, Condition and Switch statements but both are not working, or maybe I'm using them wrong.
You can use $reduce for it:
db.collection.aggregate([
{
$match: {
createdAt: {
$gte: ISODate("2022-06-28T00:00:00.000Z"),
$lte: ISODate("2022-06-30T00:00:00.000Z")
}
}
},
{
$project: {
grandTotal: {
$reduce: {
input: "$payments",
initialValue: 0,
in: {
$cond: [
{$eq: ["$$this.paymentType", "charge"]},
{$add: ["$$this.paymentTotal", "$$value"]},
{$subtract: ["$$value", "$$this.paymentTotal"]}
]
}
}
}
}
}
])
See how it works on the playground example
You can do simple math:
db.collection.aggregate([
{
$set: {
grandTotal: {
$map: {
input: "$payments",
in: {
$multiply: [
"$$this.paymentTotal",
{ $cond: [{ $eq: ["$$this.paymentType", "charge"] }, 1, -1] }
]
}
}
}
}
},
{ $set: { grandTotal: { $sum: "$grandTotal" } } }
])

Out new collection with sequence numeric id after stages in mongodb aggregate

I'm trying to out a collection, after some stages in the MongoDB aggregation pipeline.
the out collection needs to contain an id with sequence numeric id (1,2,3,4 etc...).
db.getCollection('MY_COLLECTION').aggregate([{
$addFields: {
"dataEntries.targetCol1": "$dataEntries.col1",
"dataEntries.targetCol2": "$dataEntries.col2"
}
},
{
$project: {
"_id": 0,
"dataEntries.col1": 0,
"dataEntries.col2": 0,
"dataEntries.col3": 0,
"dataEntries.col4": 0
}
},
{
$unionWith: {
coll: "MY_COLLECTION",
pipeline: [{
$addFields: {
"dataEntries.targetCol1": "$dataEntries.col3",
"dataEntries.targetCol2": "$dataEntries.col4"
}
},
{
$project: {
"_id": 0,
"dataEntries.col1": 0,
"dataEntries.col2": 0,
"dataEntries.col3": 0,
"dataEntries.col4": 0
}
}
]
}
},
{
$out: "test_out"
}
])
The result collection contains an objectId as the id.
/* 1 */
{
"_id" : ObjectId("6239cb749482cf6b13248a80"),
"dataEntries" : {
"targetCol1" : "101-A",
"targetCol2" : "101"
}
}
/* 2 */
{
"_id" : ObjectId("6239cb749482cf6b13248a81"),
"dataEntries" : {
"targetCol1" : "101-B",
"targetCol2" : "101"
}
}
There is a way to set the id to sequence numeric counter for every document?
The expected results:
/* 1 */
{
"_id" : 1,
"dataEntries" : {
"targetCol1" : "101-A",
"targetCol2" : "101"
}
}
/* 2 */
{
"_id" : 2,
"dataEntries" : {
"targetCol1" : "101-B",
"targetCol2" : "101"
}
}
In MongoDB 5.0 it is very simple:
db.collection.aggregate([
{
$setWindowFields: {
sortBy: { _id: 1, },
output: { _id: { $documentNumber: {} } }
}
}
])
In earlier versions, it is a little more to do:
db.collection.aggregate([
{
$group: {
_id: null, data: { $push: "$$ROOT" }
}
},
{
$set: {
data: {
$map: {
input: "$data",
in: {
$mergeObjects: ["$$this",
{ _id: {$add: [{ $indexOfArray: ["$data._id", "$$this._id"] }, 1]} }
]
}
}
}
}
},
{ $unwind: "$data" },
{ $replaceWith: "$data" }
])

Mongodb project results into single document

I have a collection like this:
{
"_id" : ObjectId("5f4e81f1da5ea3cb7c248a8f"),
"type" : "TYPE_1",
"updateTime" : ISODate("2020-08-24T11:10:43.219+0000")
}
{
"_id" : ObjectId("5f4e8206da5ea3cb7c248a90"),
"type" : "TYPE_1",
"updateTime" : ISODate("2020-09-24T11:10:43.219+0000")
}
{
"_id" : ObjectId("5f4e821fda5ea3cb7c248a91"),
"type" : "TYPE_2",
"updateTime" : ISODate("2020-09-25T11:10:43.219+0000")
}
I want to know how many documents there are of each type and also obtain the date of the last global modification. For now I can get these results like this:
db.getCollection("test").aggregate(
// Pipeline
[
// Stage 1
{
$group: {
_id : "$type",
count: { $sum: 1 },
lastUpdate: { "$max": "$updateTime" }
}
},
// Stage 2
{
$sort: {
lastUpdate : -1
}
},
]
);
With which I get the results this way:
{
"_id" : "TYPE_2",
"count" : 1.0,
"lastUpdate" : ISODate("2020-09-25T11:10:43.219+0000")
}
{
"_id" : "TYPE_1",
"count" : 2.0,
"lastUpdate" : ISODate("2020-09-24T11:10:43.219+0000")
}
So I have both the sum of each document and the last modification (thanks to the sort).
But I would like to project and get the results like this, in a single result document:
{
"type1" : 2.0,
"type2" : 1.0,
"lastUpdate" : ISODate("2020-09-25T11:10:43.219+0000")
}
#varman's answer is good, this is just in different way,
$group you have already done by your self
$group create types array to combine all documents
$replaceWith to replace root with field types to convert $arrayToObject
db.collection.aggregate([
{
$group: {
_id: "$type",
count: { $sum: 1 },
lastUpdate: { $max: "$updateTime" }
}
},
{
$group: {
_id: null,
types: {
$push: {
k: "$_id",
v: "$count"
}
},
lastUpdate: { $max: "$lastUpdate" }
}
},
{
$replaceWith: {
$mergeObjects: [
{ lastUpdate: "$lastUpdate" },
{ $arrayToObject: "$types" }
]
}
}
])
Playground
You can use following stages after your stage.
{
$group: {
_id: null,
data: {
$push: {
type: "$_id",
count: "$count"
}
},
lastUpdate: {
$first: "$lastUpdate"
}
}
},
{
$project: {
data: {
$arrayToObject: {
$map: {
input: "$data",
in: {
k: "$$this.type",
v: "$$this.count"
}
}
}
},
lastUpdate: 1
}
},
{
$addFields: {
"data.lastUpdate": "$lastUpdate"
}
},
{
"$replaceRoot": {
"newRoot": "$data"
}
}
Working Mongo playground

Mongo rank calculations based on count

I am trying the mongo rank calculation based on count and mentioned in below db schema. I am not getting the expecting results. Anyone help to resolve this?
Mongo Query:
db.company.aggregate([
{
"$group": {
"_id": {
"name1": "$name1",
"name2": "$name2",
},
"expanded": {
"$push": {
"name1": "$name1",
"name2": "$name2",
}
},
"count": { "$sum": 1 }
}
},
{ "$sort": { "count": -1 } },
{
$unwind: {
path: '$expanded',
includeArrayIndex: 'count'
}
}
]);
Expecting results like
Name|Count|Rank
Google|3|1
FB|2|2
Yahoo|1| 3
DB Schema :
{
"_id" : 1.0,
"name1" : "Yahoo",
"name2" : "Google",
"salary" : 1000.0
}
/* 2 */
{
"_id" : 2.0,
"name1" : "FB",
"name2" : "Google",
"salary" : 2000.0
}
/* 3 */
{
"_id" : 3.0,
"name1" : "Google",
"name2" : "FB",
"salary" : 1500.0
}
It seems like you should count name1 and name2 separately so you can create a temporary 2-element array and then run $unwind on that array. Additionally to get rank you have to $group by null to get single array of all groups, try:
db.collection.aggregate([
{
$project: {
key: [ "$name1", "$name2" ]
}
},
{
$unwind: "$key"
},
{
$group: {
_id: "$key",
count: { $sum: 1 }
}
},
{
$sort: {
count: -1
}
},
{
$group: {
_id: null,
groups: { $push: "$$ROOT" }
}
},
{
$unwind: {
path: '$groups',
includeArrayIndex: 'rank'
}
},
{
$project: {
_id: 0,
name: "$groups._id",
rank: { $add: [ "$rank", 1 ] },
count: "$groups.count"
}
}
])
Mongo Playground
try this
db.company.aggregate([
{
$group: {
_id:null,
names1: {$push: "$name1"},
names2: {$push:"$name2"},
}
},
{
$project: {
_id: 0,
names:{$concatArrays: ["$names1", "$names2"]}
}
},
{$unwind: "$names"},
{$sortByCount: "$names"},
{$addFields:{name: "$_id"}},
{
$group : {
_id: null,
records : { $push : {count : "$count", name : "$name"}}
}
},
{
$project: {
total_docs: {$size: "$records"},
records: 1
}
},
{$unwind: "$records"},
{
$project: {
_id: 0,
name: "$records.name",
count:"$records.count",
rank: {
$add:[
{
$subtract:["$total_docs", "$records.count"]
}, 1]
}
}
}])

Find duplicate inside an array mongodb

I have a Mongo Collection called Users and structured structured like this
{
_id: '1234aaa',
profile: {
Organizations: [A,B,C,A,B,A]
}
},
{
_id: '1234bbb',
profile: {
Organizations: [A,B,C]
}
},
{
_id: '1234ccc',
profile: {
Organizations: [A,B,C,C]
}
}
How do I return a list of all the documents in my collection ONLY if they have a duplicate value under profile.organizations.
The expected result would be:
DupesUsers: {
{
User: '1234aaa,
Dupes: [A,B]
},
{
User: '1234ccc,
Dupes: [C]
},
}
I've tried using Aggreagte:
db.getCollection('users').aggregate(
{$unwind: "$profile.organizations"},
{ $project: {_id: '$_id', org: '$profile.organizations'} },
{ $group: {
_id: null,
occurances: {$push: {'org': '$_id', count: '$count'}}
}
}
);
but I just can't seem to wrap my head around it.
You're not far off just some minor tweaks needed:
db.getCollection("users").aggregate(
[
{
"$unwind" : "$profile.organizations"
},
{
"$group" : {
"_id" : {
"dup" : "$profile.organizations",
"id" : "$_id"
},
"count" : {
"$sum" : 1.0
}
}
},
{
"$match" : {
"count" : {
"$gt" : 1.0
}
}
},
{
"$group" : {
_id: "$_id.id",
Dupes: {$push: "$_id.dup"}
}
}
],
);
You can use below aggregation
db.collection.aggregate([
{ "$project": {
"Dupes": {
"$filter": {
"input": { "$setUnion": ["$profile.Organizations"] },
"as": "s",
"cond": {
"$gt": [
{ "$size": {
"$filter": {
"input": "$profile.Organizations",
"cond": { "$eq": ["$$this", "$$s"] }
}
}},
1
]
}
}
}
}}
])