Mongodb Muliple averages based on a different column - mongodb

Using mongodb aggregate, is there a way to have the query return Weight average on all Scale , an average of Scale 1 , an average of Scale 2 all all returned in the same query?
This is an example of an entry in my data set
{
"Profile" : "P1",
"AvgWeight" : 639,
"Time" : "2017-04-14T05:17:42.000Z",
"Scale" : 1,
"Weight" : 1504,
"Target" : 680
}
My query that I am currently running that is averaging the weight accrost all scales ( Might not help, but better to have more info )
[{
"$match": {
"Time": {
"$gt": moment(start).format("YYYY-MM-DD HH:mm:ss"),
"$lt": moment(end).format("YYYY-MM-DD HH:mm:ss")
}
}
},
{
"$group": {
"_id": {
"hour": {
"$hour": "$Time"
},
"day": {
"$dayOfYear": "$Time"
},
"interval": {
"$add": [{
"$multiply": [{
"$minute": "$Time"
}]
},
{
"$multiply": [{
"$hour": "$Time"
},
100
]
},
{
"$multiply": [{
"$dayOfYear": "$Time"
},
10000
]
},
{
"$multiply": [{
"$year": "$Time"
},
10000000
]
}
]
}
},
"time": {
"$first": "$Time"
},
"avgW": {
"$avg": "$AvgWeight"
},
"avgWe": {
"$avg": "$Weight"
},
"avgTarget": {
"$avg": "$Target"
}
}
}, {
"$sort": {
"Time": -1
}
}
]
Adding Expected Response SOmething like
[
{
"_id : {"hour":1,"day":105,"interval":20971050122},
"time":"2017-04-15T01:22:58.000Z",
"avgW":646,
"avgWe":1577,
"avgTarget":680 ,
"Scale1" : 100 ,
"Scale2" : 120
} ,
{ "_id":{"hour":1,"day":105,"interval":20771050122},
"time":"2017-04-15T01:22:55.000Z",
"avgW":646,
"avgWe":1335,
"avgTarget":680 ,
"Scale1" : 100 ,
"Scale2" : 120 }
]
But if it is a little different I can handle it, as long all the scales are in the same parent object ( it would be to cpu intensive to post process them to link up the matching groups )

You can split the first group into two groups.
First group to calculate weight avg for all scales and second group to do rest of avgs.
Something like:
[{
"$match": {
"Time": {
"$gt": moment(start).format("YYYY-MM-DD HH:mm:ss"),
"$lt": moment(end).format("YYYY-MM-DD HH:mm:ss")
}
}
}, {
"$group": {
"_id": {
"scale": "$Scale",
"hour": {
"$hour": "$Time"
},
"day": {
"$dayOfYear": "$Time"
},
"interval": {
"$add": [{
"$multiply": [{
"$minute": "$Time"
}]
},
{
"$multiply": [{
"$hour": "$Time"
},
100
]
},
{
"$multiply": [{
"$dayOfYear": "$Time"
},
10000
]
},
{
"$multiply": [{
"$year": "$Time"
},
10000000
]
}
]
}
},
"time": {
"$first": "$Time"
},
"scaleAvg: {
"$avg": "$Weight"
}
}
}, {
"$group": {
"_id": {
"hour": "$_id.hour",
"day": "$_id.day",
"interval": "$_id.interval"
},
"time": {
"$first": "$time"
},
"avgW": {
"$avg": "$AvgWeight"
},
"avgWe": {
"$avg": "$Weight"
},
"avgTarget": {
"$avg": "$Target"
},
"scaleAvgs": {
"$push": {
"scale": "$_id.scale",
"scaleAvg": "$scaleAvg"
}
}
}
}, {
"$sort": {
"time": -1
}
}]

Related

Mongodb plus and minus in one query (subtract two query results)

I have the following collection:
[
{
"type": "debit",
"amount": 10
},
{
"type": "debit",
"amount": 20
},
{
"type": "credit",
"amount": 5
},
]
I need to sum all documents with type debit and minus sum of credits.
how can I handle this with mongodb?
so my expected result is 25
db.collection.aggregate([
{
"$group": {
"_id": "$type",
"field": {
$sum: {
"$cond": {
"if": {
"$eq": [
"$type",
"debit"
]
},
"then": "$amount",
"else": {
"$multiply": [
"$amount",
-1
]
}
}
}
}
}
},
{
"$group": {
"_id": null,
"result": {
"$sum": "$field"
}
}
}
])
https://mongoplayground.net/p/cfTixd1Oo_z

need to convert the data in another format

We have Data:
[
{
"_id": ObjectId("5f87e152219aaf1f9404ef3f"),
"parameterId": "5f914ca2679bae721d38410b",
"average": 574998.153846154,
"count": 26.0,
"date": ISODate("2020-09-08T18:30:00.000Z"),
"_class": "org.nec.iotplatform.entities.RawData"
},
{
"_id": ObjectId("5f87e1e2219aaf1f9404eff5"),
"parameterId": "5f914ca2679bae721d38410b",
"average": 494217.606225681,
"count": 1285.0,
"date": ISODate("2020-09-09T18:30:00.000Z"),
"_class": "org.nec.iotplatform.entities.RawData"
}
]
I have query which I am executing on above data and then getting the result as below the query
db.collection.aggregate([
{
"$project": {
"year": {
"$year": "$date"
},
"month": {
"$month": "$date"
},
"dayOfMonth": {
"$dayOfMonth": "$date"
},
"average": "$average",
"count": "$count",
"Symbol": 1
}
},
{
"$group": {
"_id": {
year: "$year",
month: "$month",
dayOfMonth: "$dayOfMonth"
},
"data": {
"$push": "$$ROOT"
}
}
},
{
"$project": {
"average": {
"$divide": [
{
"$reduce": {
"input": "$data",
"initialValue": 0,
"in": {
"$add": [
"$$value",
{
"$multiply": [
"$$this.count",
"$$this.average"
]
}
]
}
}
},
{
$reduce: {
input: "$data",
initialValue: 0,
in: {
"$add": [
"$$value",
"$$this.count"
]
}
}
}
]
}
}
}
])
I am getting output :
[{
"_id" : {
"year" : 2020,
"month" : 9,
"dayOfMonth" : 8
},
"average" : 574998.153846154
},
{
"_id" : {
"year" : 2020,
"month" : 9,
"dayOfMonth" : 9
},
"average" : 494217.606225681
}]
But I need to format the result data like this. by adding the date like this:
{
2020-09-08T18:30:00.000Z : 574998.153846154,
2020-09-09T18:30:00.000Z : 494217.606225681
}
Thanks in advance.
You can use $dateFromString to create the date you want.
Also, you need $concat and $toString to parse the numbers to string and concat into a single string.
After that, using $group you can get the all values you need in the same array. And how you want set the date as KEY, is neccesary create fields k and v and parse again to string.
With the values together, using $arrayToObject you can cerate the schema you want date: average and use $replaceRoot to get only the values at top level.
To do this you need to add this query at the end of your aggregation.
{
"$set": {
"date": { "$dateFromString": { "dateString": {
"$concat": [
{ "$toString": "$_id.dayOfMonth" }, "-",
{ "$toString": "$_id.month" }, "-",
{ "$toString": "$_id.year" }
] },
"format": "%d-%m-%Y", "timezone": "Europe/Madrid"
} } }
},
{
"$group": {
"_id": null,
"date": { "$push": { "k": { "$toString": "$date" }, "v": "$average" } }
}
},
{
"$replaceRoot": { "newRoot": { "$arrayToObject": "$date" } }
}
This query add a new field called date like this:
"date": ISODate("2020-09-08T04:00:00Z")
I've used Europe/Madrid as timezone but you can choose you want to get your desired date.
Example here.
The output is:
{
"2020-09-07T22:00:00.000Z": 574998.153846154,
"2020-09-08T22:00:00.000Z": 494217.606225681
}
Using America/New_York as timezone:
{
"2020-09-08T04:00:00.000Z": 574998.153846154,
"2020-09-09T04:00:00.000Z": 494217.606225681
}

How to group data by every hour

How do I get counts data grouped by every hour in 24 hours even if data is not present i.e. IF 0 will select 0
MonogDB 3.6
Input
[
{
"_id": ObjectId("5ccbb96706d1d47a4b2ced4b"),
"date": "2019-05-03T10:39:53.108Z",
"id": 166,
"update_at": "2019-05-03T02:45:36.208Z",
"type": "image"
},
{
"_id": ObjectId("5ccbb96706d1d47a4b2ced4c"),
"date": "2019-05-03T10:39:53.133Z",
"id": 166,
"update_at": "2019-05-03T02:45:36.208Z",
"type": "image"
},
{
"_id": ObjectId("5ccbb96706d1d47a4b2ced4d"),
"date": "2019-05-03T10:39:53.180Z",
"id": 166,
"update_at": "2019-05-03T20:45:36.208Z",
"type": "image"
},
{
"_id": ObjectId("5ccbb96706d1d47a4b2ced7a"),
"date": "2019-05-10T10:39:53.218Z",
"id": 166,
"update_at": "2019-12-04T10:45:36.208Z",
"type": "image"
},
{
"_id": ObjectId("5ccbb96706d1d47a4b2ced7b"),
"date": "2019-05-03T10:39:53.108Z",
"id": 166,
"update_at": "2019-05-05T10:45:36.208Z",
"type": "image"
},
{
"_id": ObjectId("5ccbb96706d1d47a4b2cedae"),
"date": "2019-05-03T10:39:53.133Z",
"id": 166,
"update_at": "2019-05-05T10:45:36.208Z",
"type": "image"
},
{
"_id": ObjectId("5ccbb96706d1d47a4b2cedad"),
"date": "2019-05-03T10:39:53.180Z",
"id": 166,
"update_at": "2019-05-06T10:45:36.208Z",
"type": "image"
},
{
"_id": ObjectId("5ccbb96706d1d47a4b2cedab"),
"date": "2019-05-10T10:39:53.218Z",
"id": 166,
"update_at": "2019-12-06T10:45:36.208Z",
"type": "image"
}
]
Implementation
db.collection.aggregate({
$match: {
update_at: {
"$gte": "2019-05-03T00:00:00.0Z",
"$lt": "2019-05-05T00:00:00.0Z"
},
id: {
"$in": [
166
]
}
}
},
{
$group: {
_id: {
$substr: [
"$update_at",
11,
2
]
},
count: {
"$sum": 1
}
},
},
{
$project: {
_id: 0,
hour: "$_id",
count: "$count"
}
},
{
$sort: {
hour: 1
}
})
Actual Output:
{
"count": 2,
"hour": "02"
},
{
"count": 1,
"hour": "20"
}
My expectation code show 24 hours event data is 0 or null and convert from example "02" as "02 AM" , "13" as "01 PM":
Expected Output
{
"count": 0,
"hour": "01" // 01 AM
},
{
"count": 2,
"hour": "02"
},
{
"count": 0,
"hour": "03"
},
{
"count": 0,
"hour": "04"
},
{
"count": 0,
"hour": "05"
},
{
"count": 1,
"hour": "20" // to 08 pm
}
Try this solution:
Explanation
We group by hour to count how many images are uploaded.
Then, we add extra field hour to create time interval (if you had v4.x, there is a better solution).
We flattern hour field (will create new documents) and split first 2 digits to match count and split last 2 digits to put AM / PM periods.
db.collection.aggregate([
{
$match: {
update_at: {
"$gte": "2019-05-03T00:00:00.0Z",
"$lt": "2019-05-05T00:00:00.0Z"
},
id: {
"$in": [
166
]
}
}
},
{
$group: {
_id: {
$substr: [
"$update_at",
11,
2
]
},
count: {
"$sum": 1
}
}
},
{
$addFields: {
hour: [
"0000",
"0101",
"0202",
"0303",
"0404",
"0505",
"0606",
"0707",
"0808",
"0909",
"1010",
"1111",
"1212",
"1301",
"1402",
"1503",
"1604",
"1705",
"1806",
"1907",
"2008",
"2109",
"2210",
"2311"
]
}
},
{
$unwind: "$hour"
},
{
$project: {
_id: 0,
hour: 1,
count: {
$cond: [
{
$eq: [
{
$substr: [
"$hour",
0,
2
]
},
"$_id"
]
},
"$count",
0
]
}
}
},
{
$group: {
_id: "$hour",
count: {
"$sum": "$count"
}
}
},
{
$sort: {
_id: 1
}
},
{
$project: {
_id: 0,
hour: {
$concat: [
{
$substr: [
"$_id",
2,
2
]
},
{
$cond: [
{
$gt: [
{
$substr: [
"$_id",
0,
2
]
},
"12"
]
},
" PM",
" AM"
]
}
]
},
count: "$count"
}
}
])
MongoPlayground
There's no "magic" solution, you'll have to hardcode it into your aggregation:
Heres an example using Mongo v3.2+ syntax with some $map and $filter magic:
db.collection.aggregate([
{
$match: {
update_at: {
"$gte": "2019-05-03T00:00:00.0Z",
"$lt": "2019-05-05T00:00:00.0Z"
},
id: {"$in": [166]}
}
},
{
$group: {
_id: {$substr: ["$update_at", 11, 2]},
count: {"$sum": 1}
}
},
{
$group: {
_id: null,
hours: {$push: {hour: "$_id", count: "$count"}}
}
},
{
$addFields: {
hours: {
$map: {
input: {
$concatArrays: [
"$hours",
{
$map: {
input: {
$filter: {
input: ["00", "01", "02", "03", "04", "05", "06", "07", "08", "09", "10", "11", "12", "13", "14", "15", "16", "17", "18", "19", "20", "21", "22", "23"],
as: "missingHour",
cond: {
$not: {
$in: [
"$$missingHour",
{
$map: {
input: "$hours",
as: "hourObj",
in: "$$hourObj.hour"
}
}
]
}
}
}
},
as: "missingHour",
in: {hour: "$$missingHour", count: 0}
}
}
]
},
as: "hourObject",
in: {
count: "$$hourObject.count",
hour: {
$cond: [
{$eq: [{$substr: ["$$hourObject.hour", 0, 1]}, "0"]},
{$concat: ["$$hourObject.hour", " AM"]},
{
$concat: [{
$switch: {
branches: [
{case: {$eq: ["$$hourObject.hour", "13"]}, then: "1"},
{case: {$eq: ["$$hourObject.hour", "14"]}, then: "2"},
{case: {$eq: ["$$hourObject.hour", "15"]}, then: "3"},
{case: {$eq: ["$$hourObject.hour", "16"]}, then: "4"},
{case: {$eq: ["$$hourObject.hour", "17"]}, then: "5"},
{case: {$eq: ["$$hourObject.hour", "18"]}, then: "6"},
{case: {$eq: ["$$hourObject.hour", "19"]}, then: "7"},
{case: {$eq: ["$$hourObject.hour", "20"]}, then: "8"},
{case: {$eq: ["$$hourObject.hour", "21"]}, then: "9"},
{case: {$eq: ["$$hourObject.hour", "22"]}, then: "10"},
{case: {$eq: ["$$hourObject.hour", "23"]}, then: "11"},
],
default: "None"
}
}, " PM"]
}
]
}
}
}
}
}
},
{
$unwind: "$hours"
},
{
$project: {
_id: 0,
hour: "$hours.hour",
count: "$hours.count"
}
},
{
$sort: {
hour: 1
}
}
]);
A short explanation of the $addFields stage: we first add hours that we're missing, we then merge the two arrays (of the original found hours and the "new" missing hours), finally we convert to the required output ("01" to "01 AM").
If you're using Mongo v4+ I recommend you change the $group _id stage to use $dateFromString as its more consistent.
_id: {$hour: {$dateFromString: {dateString: "$update_at"}}}
If you do do that, you'll have to update the $filter and $map section to use numbers and not strings and eventually using $toString to cast into the format you want, hence the v4+ requirement.
You should store date values as Date objects instead of strings. I would do the formatting like this:
db.collection.aggregate(
[
{ $match: { ... } },
{
$group: {
_id: { h: { $hour: "$update_at" } },
count: { $sum: 1 }
}
},
{
$project: {
_id: 0,
hour: {
$switch: {
branches: [
{ case: { $lt: ["$_id.h", 10] }, then: { $concat: ["0", { $toString: "$_id.h" }, " AM"] } },
{ case: { $lt: ["$_id.h", 13] }, then: { $concat: [{ $toString: "$_id.h" }, " AM"] } },
{ case: { $lt: ["$_id.h", 22] }, then: { $concat: ["0", { $toString: { $subtract: ["$_id.h", 12] } }, " PM"] } },
{ case: { $lt: ["$_id.h", 24] }, then: { $concat: [{ $toString: { $subtract: ["$_id.h", 12] } }, " PM"] } }
]
}
},
hour24: "$_id.h",
count: 1
}
},
{ $sort: { hour24: 1 } }
])
As non-American I am not familiar with AM/PM rules, esp. for midnight and midday but I guess you get the principle.
Here is the query you can test it out, for MongoDB 4.0+
i will be improving query and update
const query = [{
$match: {
update_at: {
"$gte": ISODate("2019-05-03T00:00:00.0Z"),
"$lt": ISODate("2019-05-05T00:00:00.0Z")
},
id: {
"$in": [
166
]
}
}
},
{
$group: {
_id: { $hour: "$update_at" },
count: {
"$sum": 1
}
},
},
{
$addFields: {
hourStr: { $toString: { $cond: { if: { $gte: ["$_id", 12] }, then: { $subtract: [12, { $mod: [24, '$_id'] }] }, else: "$_id" } } },
}
},
{
$project: {
formated: { $concat: ["$hourStr", { $cond: { if: { $gt: ["$_id", 12] }, then: " PM", else: " AM" } }] },
count: "$count",
hour: 1,
}
}]
If you want to output in Indian Time formate. then below code work!
const query = [
{
$match: {
update_at: {
"$gte": ISODate("2019-05-03T00:00:00.0Z"),
"$lt": ISODate("2019-05-05T00:00:00.0Z")
},
id: {
"$in": [
166
]
}
}
},
{
$project: {
"h": { "$hour": { date: "$update_at", timezone: "+0530" } },
}
},
{
$group:
{
_id: { $hour: "$h" },
count: { $sum: 1 }
}
}
];

mongodb aggregation with multiple sub groups

I have a collection with documents that look similar to this:
[
{
"_id": ObjectId("..."),
"date": ISODate("..."),
"type": "TypeA",
"color": "ColorA",
"soldFor": 12.15
},
{
"_id": ObjectId("..."),
"date": ISODate("..."),
"type": "TypeA",
"color": "ColorB",
"soldFor": 13.15
},
{
"_id": ObjectId("..."),
"date": ISODate("..."),
"type": "TypeB",
"color": "ColorA",
"soldFor": 12.15
},
{
"_id": ObjectId("..."),
"date": ISODate("..."),
"type": "TypeB",
"color": "ColorB",
"soldFor": 12.15
}
]
I know that this is not a good way to store such information, but unfortunately I have no influence in that.
What I need to get out of the collection is something like this:
[
2017: {
typeA: {
colorA: {
sum: 125.00
},
colorB: {
sum: 110.00
}
},
typeB: {
colorA: {
sum: 125.000
}
}
},
2016: {
typeA: {
colorB: {
sum: 125.000
}
}
}
]
At the moment I have two group stages that give me everything grouped by year, but I have no clue how to get the two other sub-groups. Building the sum would be a nice to have, but I am certain that I can figure out how that would be done in a group.
So far my pipeline looks like this:
[
{
$group: {
_id: { type: '$type', color: '$color', year: { $year: '$date' } },
docs: {
$push: '$$ROOT'
}
}
},
{
$group: {
_id: { year: '$_id.year' },
docs: {
$push: '$$ROOT'
}
}
}
]
which results in something like this:
[
{
"_id": {
"year": 2006
},
"docs": {
"_id": {
"type": "typeA",
"color": "colorA",
"year": 2006
},
"docs": [
{
... root document
}
]
}
},
{
"_id": {
"year": 2016
},
"docs": [
{
"_id": {
"type": "typeA",
"color": "colorB",
"year": 2016
},
"docs": [
{
... root document
}
]
}
... more docs with three keys in id
]
}
]
Help is much appreciated!
Using a cohort of operators found in MongoDB 3.4.4 and newer, i.e. $addFields, $arrayToObject and $replaceRoot, you can compose a pipeline like the following to get the desired result:
[
{ "$group": {
"_id": {
"year": { "$year": "$date" },
"type": "$type",
"color": "$color"
},
"count": { "$sum": "$soldFor" }
} },
{ "$group": {
"_id": {
"year": "$_id.year",
"type": "$_id.type"
},
"counts": {
"$push": {
"k": "$_id.color",
"v": { "sum": "$count" }
}
}
} },
{ "$addFields": {
"counts": { "$arrayToObject": "$counts" }
} },
{ "$group": {
"_id": "$_id.year",
"counts": {
"$push": {
"k": "$_id.type",
"v": "$counts"
}
}
} },
{ "$addFields": {
"counts": { "$arrayToObject": "$counts" }
} },
{ "$group": {
"_id": null,
"counts": {
"$push": {
"k": { "$substr": ["$_id", 0, -1 ]},
"v": "$counts"
}
}
} },
{ "$replaceRoot": {
"newRoot": {
"$mergeObjects": [
{ "$arrayToObject": "$counts" },
"$$ROOT"
]
}
} },
{ "$project": { "counts": 0 } }
]

Grouping different amounts together in MongoDB

If I have a set of objects each with the same description, but with different amounts.
{
{
"_id": "101",
"description": "DD from my employer1",
"amount": 1000.33
},
{
"_id": "102",
"description": "DD from my employer1",
"amount": 1000.34
},
{
"_id": "103",
"description": "DD from my employer1",
"amount": 1000.35
},
{
"_id": "104",
"description": "DD from employer1",
"amount": 5000.00
},
{
"_id": "105",
"description": "DD from my employer2",
"amount": 2000.33
},
{
"_id": "106",
"description": "DD from my employer2",
"amount": 2000.33
},
{
"_id": "107",
"description": "DD from my employer2",
"amount": 2000.33
}
}
Below, I am able to group them using the description:
{
{
"$group": {
"_id": {
"description": "$description"
},
"count": {
"$sum": 1
},
"_id": {
"$addToSet": "$_id"
}
}
},
{
"$match": {
"count": {
"$gte": 3
}
}
}
}
Is there a way to include all the amounts in the group (_ids: 101, 102, and 103 plus 105,106,107) even if they have a small difference, but exclude the bonus amount, which in the sample above is _id 104?
I don't believe it could be done in a group stage, but is there something that could be done at a later stage that could group _ids 101, 102 and 103 together and exclude _id 104. Basically, I want MongoDB to ignore the small differences in 101, 102, 103 and group them together since the are paychecks coming from the same employer.
I have been working with $stdDevPop, but can't get a solid formula down.
I am looking for a simple array output of just the _ids.
{
"result": [
"101",
"102",
"103",
"105",
"106",
"107"
]
}
You can do this by doing some math on the "amount" to round it down to the nearest 1000 and use that as the grouping _id:
db.collection.aggregate([
{ "$group": {
"_id": {
"$subtract": [
{ "$trunc": "$amount" },
{ "$mod": [
{ "$trunc": "$amount" },
1000
]}
]
},
"results": { "$push": "$_id" }
}},
{ "$redact": {
"$cond": {
"if": { "$gt": [ { "$size": "$results" }, 1 ] },
"then": "$$KEEP",
"else": "$$PRUNE"
}
}},
{ "$unwind": "$results" },
{ "$group": {
"_id": null,
"results": { "$push": "$results" }
}}
])
If your MongoDB is older than 3.2 then you would just need to use a long form with $mod of what $trunc is doing. And if your MongoDB is older than 2.6 then rather than $redact you would $match. So in the longer form this is:
db.collection.aggregate([
{ "$group": {
"_id": {
"$subtract": [
{ "$subtract": [
"$amount",
{ "$mod": [ "$amount", 1 ] }
]},
{ "$mod": [
{ "$subtract": [
"$amount",
{ "$mod": [ "$amount", 1 ] }
]},
1000
]}
]
},
"results": { "$push": "$_id" },
"count": { "$sum": 1 }
}},
{ "$match": { "count": { "$gt": 1 } } },
{ "$unwind": "$results" },
{ "$group": {
"_id": null,
"results": { "$push": "$results" }
}}
])
Either way the output is just the _id values whose amounts grouped to the boundaries with a count more than once.
{ "_id" : null, "results" : [ "105", "106", "107", "101", "102", "103" ] }
You could either add a $sort in there or live with sorting the result array in client code.
db.yourDBNameHere.aggregate( [
{ $match: { "amount" : { $lt : 5000 } } },
{ $project: { _id: 1 } },
])
that will grab the ID only of every transaction less than 5000$.