How do I group and count values by value range in MongoDB - mongodb

I have the following documents in my MongoDB:
_id: ObjectId(...)
'timestamp': 2022-11-03T10:00:00.000+00:00
score: 1
_id: ObjectId(...)
'timestamp': 2022-11-03T09:00:00.000+00:00
score: 3
_id: ObjectId(...)
'timestamp': 2022-11-03T10:00:00.000+00:00
score: 6
_id: ObjectId(...)
'timestamp': 2022-11-03T10:00:00.000+00:00
score: 10
I want to make an aggregation that counts the score within the range of (gte)1-(lt)5 as poor, (gte)5-(lt)7 as ok, (gte)7-(lt)8.5 as good and (gte)8.5-(lte)10 as excellent.
So the result would look like this:
{
"data": [
{
"name": "excellent",
"count": 1
},
{
"name": "good",
"count": 0
},
{
"name": "ok",
"count": 1
},
{
"name": "poor",
"count": 2
}
]
}
How do I achieve that?

If you accept an answer only with documents that have a count, you can do:
db.collection.aggregate([
{$project: {
_id: {
$arrayElemAt: [
["poor", "ok", "good", "excellent"],
{$floor: {$divide: ["$score", 10]}}
]}
}},
{$group: {_id: "$_id", count: {$sum: 1}}}
])
Otherwise you need to create all categories:
db.collection.aggregate([
{$group: {
_id: 0,
excellent: {$sum: {$cond: [{$gte: ["$score", 30]}, 1, 0]}},
good: {$sum: {$cond: [{$and: [{$gte: ["$score", 20]}, {$lt: ["$score", 30]}]}, 1, 0]}},
ok: {$sum: {$cond: [{$and: [{$gte: ["$score", 10]}, {$lt: ["$score", 20]}]}, 1, 0]}},
poor: {$sum: {$cond: [{$lt: ["$score", 10]}, 1, 0]}}
}},
{$unset: "_id"},
{$project: {data: {$objectToArray: "$$ROOT"}}},
{$project: {
data: {$map: {
input: "$data",
in: {nmae: "$$this.k", count: "$$this.v"}
}}
}}
])
See how it works on the playground example

Related

Filter nested objects

I have a collection of docs like
{'id':1, 'score': 1, created_at: ISODate(...)}
{'id':1, 'score': 2, created_at: ISODate(...)}
{'id':2, 'score': 1, created_at: ISODate(...)}
{'id':2, 'score': 20, created_at: ISODate(...)}
etc.
Does anyone know how to find docs that were created within the past 24hrs where the difference of the score value between the two most recent docs of the same id is less than 5?
So far I can only find all docs created within the past 24hrs:
[{
$project: {
_id: 0,
score: 1,
created_at: 1
}
}, {
$match: {
$expr: {
$gte: [
'$created_at',
{
$subtract: [
'$$NOW',
86400000
]
}
]
}
}
}]
Any advice appreciated.
Edit: By the two most recent docs, the oldest of the two can be created more than 24hrs ago. So the most recent doc would be created within the past 24hrs, but the oldest doc could be created over 24hrs ago.
If I understand you correctly, you want something like:
db.collection.aggregate([
{$match: {$expr: {$gte: ["$created_at", {$subtract: ["$$NOW", 86400000]}]}}},
{$sort: {created_at: -1}},
{$group: {_id: "$id", data: {$push: "$$ROOT"}}},
{$project: {pair: {$slice: ["$data", 0, 2]}, scores: {$slice: ["$data.score", 0, 2]}}},
{$match: {$expr: {
$lte: [{$abs: {$subtract: [{$first: "$scores"}, {$last: "$scores"}]}}, 5]
}}},
{$unset: "scores"}
])
See how it works on the playground example
EDIT:
according to you comment, one option is:
db.collection.aggregate([
{$setWindowFields: {
partitionBy: "$id",
sortBy: {created_at: -1},
output: {data: {$push: "$$ROOT", window: {documents: ["current", 1]}}}
}},
{$group: {
_id: "$id",
created_at: {$first: "$created_at"},
pair: {$first: "$data"}
}},
{$match: {$expr: {$and: [
{$gte: ["$created_at", {$dateAdd: {startDate: "$$NOW", unit: "day", amount: -1}},
{$eq: [{$size: "$pair"}, 2]},
{$lte: [{$abs: {$subtract: [{$first: "$pair.score"},
{$last: "$pair.score"}]}}, 5]}
]}}},
{$project: {_id: 0, pair: 1}}
])
See how it works on the playground example
If I've understood correctly you can try this query:
First the $match as you have to get documents since a day ago.
Then $sort by the date to ensure the most recent are on top.
$group by the id, and how the most recent were on top, using $push will be the two first elements in the array.
So now you only need to $sum these two values.
And filter again with these one that are less than ($lt) 5.
db.collection.aggregate([
{
$match: {
$expr: {
$gte: [
"$created_at",
{
$subtract: [
"$$NOW",
86400000
]
}
]
}
}
},
{
"$sort": {
"created_at": -1
}
},
{
"$group": {
"_id": "$id",
"score": {
"$push": "$score"
}
}
},
{
"$project": {
"score": {
"$sum": {
"$firstN": {
"n": 2,
"input": "$score"
}
}
}
}
},
{
"$match": {
"score": {
"$lt": 5
}
}
}
])
Example here
Edit: $firstN is new in version 5.2. Other way you can use $slice in this way.

Projecting data after doing a $facet

After doing a $facet I receive this output:
[
{
"confirmed": [
{
"confirmed": 100
}
],
"denied": [
{
"denied": 50
}
],
"pending": [
{
"pending": 20
}
]
}
]
how can I project it into something like this?
[
{
category: "confirmed", count: 100,
category: "denied", count: 50,
category: "pending", count: 20
}
]
I need the faucet part because to extract those numbers I have to do several $match to the same data. Dont know if there is a better option.
Thank you!
What you ask is not a valid format. This is an object with duplicate keys. You may want:
[{"confirmed": 100, "denied": 50, "pending": 20}]
or
[
{category: "confirmed", count: 100},
{category: "denied", count: 50},
{category: "pending", count: 20}
]
which are both valid options
I guess you want the second option. If you want the generic solution, one option is:
db.collection.aggregate([
{$project: {res: {$objectToArray: "$$ROOT"}}},
{$project: {
res: {$map: {
input: "$res",
in: {category: "$$this.k", count: {$objectToArray: {$first: "$$this.v"}}}
}}
}},
{$project: {
res: {$map: {
input: "$res",
in: {category: "$$this.category", count: {$first: "$$this.count.v"}}
}}
}},
{$unwind: "$res"},
{$replaceRoot: {newRoot: "$res"}}
])
See how it works on the playground example - generic
If you want the literal option, just use:
db.collection.aggregate([
{$project: {
res: [
{category: "confirmed", count: {$first: "$confirmed.confirmed"}},
{category: "denied", count: {$first: "$denied.denied"}},
{category: "pending", count: {$first: "$pending.pending"}}
]
}
},
{$unwind: "$res"},
{$replaceRoot: {newRoot: "$res"}}
])
See how it works on the playground example - literal

Finding ranges of continuous values

I have the following Mongo collection:
[
{
"key": 1,
"user": "A",
"comment": "commentA1"
},
{
"key": 2,
"user": "A",
"comment": "commentA2"
},
{
"key": 5,
"user": "A",
"comment": "commentA5"
},
{
"key": 2,
"user": "B",
"comment": "commentB2"
},
{
"key": 3,
"user": "B",
"comment": "commentB3"
},
{
"key": 6,
"user": "B",
"comment": "commentB6"
}
]
and I need to find the first continuous keys, with no gaps, per user.
So, for user A I should get the first 2 documents, and for user B the first two also.
The collection might contain more than 2M documents, so the query should work fast.
I have found SQL solutions for this problem (http://www.silota.com/docs/recipes/sql-gap-analysis-missing-values-sequence.html in section number 3), but I am looking for a Mongo solution.
How can I do it in Mongo 4.0 (DocumentDB) ?
EDIT: according to further elaboration on the comments,
One option is:
db.collection.aggregate([
{$sort: {key: 1}},
{$group: {
_id: "$user",
data: {$push: {key: "$key", comment: "$comment"}},
shadow: {$push: {$add: ["$key", 1]}}
}},
{$project: {
data: 1,
shadow: {$filter: {input: "$shadow", cond: {$in: ["$$this", "$data.key"]}}}
}},
{$project: {data: 1, shadow: 1, firstItem: {$subtract: [{$first: "$shadow"}, 1]}}},
{$project: {data: 1, firstItem: 1, shadow: {$concatArrays: [["$firstItem"], "$shadow"]}}},
{$project: {
data: 1,
shadow: {$reduce: {
input: {$range: [0, {$size: "$shadow"}]},
initialValue: [],
in: {
$concatArrays: [
"$$value",
{$cond: [
{$eq: [
{$arrayElemAt: ["$shadow", "$$this"]},
{$add: ["$$this", "$firstItem"]}
]},
[{$arrayElemAt: ["$shadow", "$$this"]}],
[]
]},
]
}
}
}
}
},
{$project: {data: {$filter: {input: "$data", cond: {$in: ["$$this.key", "$shadow"]}}}}},
{$unwind: "$data"},
{$project: {comment: "$data.comment", key: "$data.key"}}
])
See how it works on the playground example

MongoDB Aggregation with Sum and Multiple Group Results

Let's say I have these collections members and positions
[
{
"church":"60dbb265a75a610d90b45c6b", "parentId":"60dbb265a75a610d90b45c6b", name: "Jonah John", status: 1, birth: "1983-01-01", position: "60f56f59-08be-49ec-814a-2a421f21bc08"
},
{
"church":"60dbb265a75a610d90b45c6b", "parentId":"60dbb265a75a610d90b45c6b", name: "March John", status: 1, birth: "1981-01-23", position: "60f56f59-08be-49ec-814a-2a421f21bc08"
},
{
"church":"60dbb265a75a610d90b45c6b", "parentId":"60dbb265a75a610d90b45c6b",name: "Jessy John", status: 0, birth: "1984-08-01", position: "e5bba609-082c-435a-94e3-0997fd229851"
}
]
[
{_id: "60f56f59-08be-49ec-814a-2a421f21bc08", name: "Receptionist"},
{_id: "5c78ba5a-3e6c-4d74-8d4a-fa23d02b8003", name: "Curtain"},
{_id: "e5bba609-082c-435a-94e3-0997fd229851", name: "Doorman"}
]
I want to aggregate in a way I can get:
inactiveMembers
activeMembers
totalMembers
totalPositionsOcuppied
And two arrays with:
positionsOcuppied {name, quantity}
birthdays {month, quantity.
I need an output like this:
{
"_id": {
"church":"60dbb265a75a610d90b45c6b",
"parentId":"60dbb265a75a610d90b45c6b"
},
"inactiveMembers":1,
"activeMembers":2,
"totalMembers":3,
"birthdays": [
{january:2}, {august:1}
],
"positionsOcuppied": [
{Doorman: 1}, {Receptionist:2}
],
"totalPositionsOcuppied": 3
}
How can I do that?
PS.: Very sorry for unclear values...
Update:
$addFields with birthMonth string
$lookup to add positions
$facet to $group by birthdays, positionsOcuppied, and all docs tougher as other
$map to format birthdays and positionsOcuppied
Format the answer
db.people.aggregate([
{$addFields: {
birthMonth: {
$arrayElemAt: [
["","Jan","Feb","Mar","Apr","May","Jun","Jul","Aug","Sep","Oct","Nov","Dec"],
{$month: {$toDate: "$birth"}}
]
}
}
},
{$lookup: {from: "positions", localField: "position", foreignField: "_id",
as: "position"}},
{$facet: {
birthdays: [{$group: {_id: "$birthMonth", count: {$sum: 1}}}],
positionsOcuppied: [{$group: {_id: {$first: "$position.name"}, count: {$sum: 1}}}],
other: [
{$group: {_id: 0,
activeMembers: {$sum: "$status"},
totalMembers: {$sum: 1},
church: {$first: "$church"},
parentId: {$first: "$parentId"},
totalPositionsOcuppied: {$sum: {$size: "$position"}}
}
}
]
}
},
{$set: {
birthdays: {
$map: {input: "$birthdays", in: [{k: "$$this._id", v: "$$this.count"}]}
},
positionsOcuppied: {
$map: {input: "$positionsOcuppied", in: [{k: "$$this._id", v: "$$this.count"}]}
},
other: {$first: "$other"}
}
},
{$set: {
"other.birthdays": {
$map: {input: "$birthdays", in: {$arrayToObject: "$$this"}}
},
"other.positionsOcuppied": {
$map: {input: "$positionsOcuppied", in: {$arrayToObject: "$$this"}}
},
"other.inactiveMembers": {
$subtract: ["$other.totalMembers", "$other.activeMembers"]
},
"other._id": {church: "$other.church", parentId: "$other.parentId"},
birthdays: "$$REMOVE",
"other.church": "$$REMOVE",
"other.parentId": "$$REMOVE",
positionsOcuppied: "$$REMOVE"
}
},
{$replaceRoot: {newRoot: "$other"}}
])
See how it works on the playground example

how to get last character from a string in mongodb?

Data:
{code: "XXXXXXXX1", total: 400},
{code: "YYYYY2", total: 500}
{code: "ZZZZZZ3", total: 100}
{code: "AAA5", totala: 200}
I want to create an aggregate function to group the data above by its last character in the code field. code field is a string and can be varied in length. I only want to get its last index/number. Something like:
db.transactions.aggregate([
{$project: {
last_index: {$getMyLastCharInMyCode: "$code"},
total: 1
}},
{$group: {_id: "$last_index", {total: {$sum: "$total"}}}}
])
I searched the internet and mongodb manuals, it seems impossible. Any ideas? Thank you
There you go:
db.transactions.aggregate({
$addFields: {
"last_index": { $substr: [ "$code", { $subtract: [ { $strLenCP: "$code" }, 1 ] }, 1 ] }
}
})
db.transactions.aggregate([
{"$project": {
total: 1,
code: 1,
last_index: { $substr: [ "$code", { $subtract: [ {"$strLenCP": "$code"}, 1 ] }, -1 ]}
}
},
{"$group": {
"_id": "$last_index",
"total": {"$sum": "$total"}
}
}
])