Mongo aggregation: partitioning values into groups (by partition) - mongodb

Using this approach, I can group a set of event documents by time. This solution returns the same input documents but with a partition number added.
How can I do the same thing but return the partitions instead? An example output document would be:
{
partition: 0,
startDate: ISODate("1900-04-12T18:30:00.000Z"),
endDate: ISODate("2019-04-12T18:30:00.000Z"),
numEvents: 27
}

Let's say this is the current output of your aggregation:
{
"_id" : null,
"datesWithPartitions" : [
{
"date" : ISODate("2019-04-12T18:30:00Z"),
"partition" : 0
},
{
"date" : ISODate("2019-04-12T20:00:00Z"),
"partition" : 1
},
{
"date" : ISODate("2019-04-12T20:10:00Z"),
"partition" : 1
},
{
"date" : ISODate("2019-04-12T21:00:00Z"),
"partition" : 2
},
{
"date" : ISODate("2019-04-12T21:15:00Z"),
"partition" : 2
},
{
"date" : ISODate("2019-04-12T21:45:00Z"),
"partition" : 3
},
{
"date" : ISODate("2019-04-12T23:00:00Z"),
"partition" : 4
}
]
}
To get the data in a format you need you need to append following aggregation steps:
db.col.aggregate([
{
$unwind: "$datesWithPartitions"
},
{
$group: {
_id: "$datesWithPartitions.partition",
numEvents: { $sum: 1 },
startDate: { $min: "$datesWithPartitions.date" },
endDate: { $max: "$datesWithPartitions.date" }
}
},
{
$project: {
_id: 0,
partition: "$_id",
startDate: 1,
endDate: 1,
numEvents: 1
}
}
])
$unwind will return single date per document and then you can apply $group with $min and $max to get partition boundaries and $sum to count partition elements

Related

I need limited nested array in mongodb document

I have a document like
{
"deviceId" : "1106",
"orgId" : "5ffe9fe1c9e77c0006f0aad3",
"values" : [
{
"paramVal" : 105.0,
"dateTime" : ISODate("2021-05-05T09:18:08.000Z")
},
{
"paramVal" : 110.0,
"dateTime" : ISODate("2021-05-05T09:18:08.000Z")
},
{
"paramVal" : 115.0,
"dateTime" : ISODate("2021-05-05T10:18:08.000Z")
},
{
"paramVal" : 125.0,
"dateTime" : ISODate("2021-05-05T11:18:08.000Z")
},
{
"paramVal" : 135.0,
"dateTime" : ISODate("2021-05-05T12:18:08.000Z")
}
]
}
Now I need to filter a document which I can do easily with match or find but in that document the subarray i.e. values should have latest 2 values because in future the count can be more than 100.
the output should be like
{
"deviceId" : "1106",
"orgId" : "5ffe9fe1c9e77c0006f0aad3",
"values" : [
{
"paramVal" : 125.0,
"dateTime" : ISODate("2021-05-05T11:18:08.000Z")
},
{
"paramVal" : 135.0,
"dateTime" : ISODate("2021-05-05T12:18:08.000Z")
}
]
}
Try $slice operator, to select number of elements, pass negative value to select documents from below/last elements,
db.collection.aggregate([
{ $set: { values: { $slice: ["$values", -2] } } }
])
Playground
I need for the array values in sorted order by date
There is no straight way to do this, check the below aggregation query, but it will cause the performance issues, i would suggest to change you schema structure to manage this data order by date,
$unwind deconstruct values array
$sort by dateTime in descending order
$group by _id and reconstruct values array and return other required fields
$slice to select number of elements, pass negative value to select documents from below/last elements
db.collection.aggregate([
{ $unwind: "$values" },
{ $sort: { "values.dateTime": -1 } },
{
$group: {
_id: "$_id",
deviceId: { $first: "$deviceId" },
orgId: { $first: "$orgId" },
values: { $push: "$values" }
}
},
{ $set: { values: { $slice: ["$values", 2] } } }
])
Playground

Check if subdocument in range complies with a condition

I'm working on a mongoDB query.
I have several documents which I query with following results:
{
"_id" : 1000.0,
"date" : ISODate("2018-05-25T00:20:00.000Z"),
"value" : true
}
{
"_id" : 1000.0,
"date" : ISODate("2018-05-25T00:26:00.000Z"),
"value" : false
}
{
"_id" : 1000.0,
"date" : ISODate("2018-05-25T00:30:00.000Z"),
"value" : false
}
The original documents are filtered so that I get only document within the last 15 minutes before now and there is no way of knowing how many entries are in that time range.
I need to expand my existing query so that it returns a status based on the "value". If there are no true I need a status 0, if there is at least 1 but not only true I need a status 1, and if there are only true I need a status 2.
For example:
{
"_id" : 1000,
"status" : 1
},
{
"_id" : 1001,
"status" : 2
}
Is there a way of accomplishing this using mongoDB? Or would it be better/easier to do it on java side? Note that there are several _id in the database.
You can gather all values from each group into one array (using $group and $push) and then use $switch to apply your logic. To determine whether array contains any true value or all values are true you can use $anyElementTrue and $allElementsTrue:
db.col.aggregate([
{
$group: {
_id: "$_id",
values: { $push: "$value" }
}
},
{$unwind:"$values"},
{
$project: {
_id: 1,
status: {
$switch: {
branches: [
{ case: { $allElementsTrue: "$values" }, then: 2 },
{ case: { $anyElementTrue: "$values" }, then: 1 },
],
default: 0
}
}
}
}
])

MongoDB nested group by query

I want to count correct, incorrect and unattempted question count. I am getting zero values.
Query -
db.studentreports.aggregate([
{ $match: { 'groupId': 314 } },
{ $unwind: '$questions' },
{ $group:
{
_id: {
dateTimeStamp: '$dateTimeStamp',
customerId: '$customerId'
},
questions : { $push: '$questions' },
unttempted : { $sum : { $eq: ['$questions.status',0]}},
correct : { $sum : { $eq: ['$questions.status',1]}},
incorrect : { $sum : { $eq: ['$questions.status',2]}},
Total: { $sum: 1 }
}
}
])
Schema structure -
{
"_id" : ObjectId("59fb46ed560e1a2fd5b6fbf4"),
"customerId" : 2863318,
"groupId" : 309,
"questions" : [
{
"questionId" : 567,
"status" : 0,
"_id" : ObjectId("59fb46ee560e1a2fd5b700a4"),
},
{
"questionId" : 711,
"status" : 0,
"_id" : ObjectId("59fb46ee560e1a2fd5b700a3")
},
....
values unttempted, correct and incorrect are getting wrong -
"unttempted" : 0,
"correct" : 0,
"incorrect" : 0,
"Total" : 7558.0
Group by is required based on datetime and customerId.
Can some one correct query ?
Thanks.
You want to sum these fields only if a certain condition is met.
You just have to rewrite your group statement like this:
{ $group:
{
_id: {
dateTimeStamp: '$dateTimeStamp',
customerId: '$customerId'
},
questions : { $push: '$questions' },
unttempted : { $sum : {$cond:[{ $eq: ['$questions.status',0]}, 1, 0]}},
correct : { $sum : {$cond:[{ $eq: ['$questions.status',1]}, 1, 0]}},
incorrect : { $sum : {$cond:[{ $eq: ['$questions.status',2]}, 1, 0]}},
Total: { $sum: 1 }
}
}
Check out the documentation $eq. $eq compares and returns true or false. So then your $sum cannot do anything with that result

Mongo aggregation framework: group users by age

I have a user base stored in mongo. Users may record their date of birth.
I need to run a report aggregating users by age.
I now have a pipeline that groups users by year of birth. However, that is not precise enough because most people are not born on January 1st; so even if they are born in, say, 1970, they may well not be 43 yet.
db.Users.aggregate([
{ $match : { "DateOfBirth" : { $exists : true} } },
{ $project : {"YearOfBirth" : {$year : "$DateOfBirth"} } },
{ $group : { _id : "$YearOfBirth", Total : { $sum : 1} } },
{ $sort : { "Total" : -1 } }
])
Do you know if it's possible to perform some kind of arithmetic within the aggregation framework to exactly calculate the age of a user? Or is this possible with MapReduce only?
It seems like the whole thing is possible with the new Mongo 2.4 version just released, supporting additional Date operations (namely the "$subtract").
Here's how I did it:
db.Users.aggregate([
{ $match : { "DateOfBirth" : { $exists : true} } },
{ $project : {"ageInMillis" : {$subtract : [new Date(), "$DateOfBirth"] } } },
{ $project : {"age" : {$divide : ["$ageInMillis", 31558464000] }}},
// take the floor of the previous number:
{ $project : {"age" : {$subtract : ["$age", {$mod : ["$age",1]}]}}},
{ $group : { _id : "$age", Total : { $sum : 1} } },
{ $sort : { "Total" : -1 } }
])
There are not enough dateTime operators and math operators to project out the date. But you might be able to create age ranges by composing a dynamic query:
Define your date ranges as cut-off dates as
dt18 = today - 18
dt25 = today - 25
...
dt65 = today - 65
Then do nested conditionals, where you progressively use the cut off dates as age group markers, like so:
db.folks.save({ "_id" : 1, "bd" : ISODate("2000-02-03T00:00:00Z") });
db.folks.save({ "_id" : 2, "bd" : ISODate("2010-06-07T00:00:00Z") });
db.folks.save({ "_id" : 3, "bd" : ISODate("1990-10-20T00:00:00Z") });
db.folks.save({ "_id" : 4, "bd" : ISODate("1964-09-23T00:00:00Z") });
db.folks.aggregate(
{
$project: {
ageGroup: {
$cond: [{
$gt: ["$bd",
ISODate("1995-03-19")]
},
"age0_18",
{
$cond: [{
$gt: ["$bd",
ISODate("1988-03-19")]
},
"age18_25",
"age25_plus"]
}]
}
}
},
{
$group: {
_id: "$ageGroup",
count: {
$sum: 1
}
}
})

MongoDB Aggregation Function Returning Undefined

I'm attempting to use use the new MongoDB aggregation features to tally some statistics by date. Below is a sample of the documents that I am working with, my attempted code and desired result. The aggregation function retuns "UNDEFINED". Can someone tell me why that is? And secondly, I want my aggregation function to group results by date in mm-dd-yyyy format. However as it is currently written I think the code is going to execute the aggregation by the full ISO date. Can someone please tell me how to fix this?
DOCUMENT EXAMPLE
{
user: "2A8761E4-C13A-470E-A759-91432D61B6AF-25982-0000352D853511AF",
language: "English",
imageFileName: "F7A5ED9-D43C-4671-A5C6-F06C7E41F902-7758-000008371FB5B834",
audioFileName: "F6D5727D-9377-4092-A28A-AA900F02653D-7758-0000083749066CF2",
date: ISODate("2012-10-22T02:43:52Z"),
correct: "1",
_id: ObjectId("5084b2e8179c41cc15000001")
}
AGGREGATION FUNCTION
var getUserStats = function(user, language, callback) {
var guessCollection = db.collection('Guesses');
guessCollection.aggregate(
{ $match: {
user: user,
language: language,
}},
{ $sort: {
date: 1
}},
{ $project : {
user : 1,
language : 1,
date : 1,
correct : 1,
incorrect : 1,
} },
{ $unwind : "$language" },
{ $group : {
_id : "$date",
correct : { $sum : "$correct" },
incorrect : { $sum : "$incorrect" }
} }
, function(err, result){
console.log(result);
callback(result);
});
DESIRED RESULT
{
"result" : [
//...snip...
{
"_id" : "2A8761E4-C13A-470E-A759-91432D61B6AF-25982-0000352D853511AF",
"correct" : 32,
"incorrect" : 17,
"date" : 2012-10-22
},
{
"_id" : "2A8761E4-C13A-470E-A759-91432D61B6AF-25982-0000352D853511AF",
"correct" : 16,
"incorrect" : 7,
"date" : 2012-10-23
}
],
"Ok" : 1
}
Regarding your first question about it returning undefined, there are two problems:
You are using the $unwind operator on a field ($language) that isn't an array.
You are using the $sum operator on a string field ($correct); that's only supported for number fields.
For your second question about grouping on just the date, you need to project the date components you want to group on and then use those components in your $group operator's _id value:
For example:
test.aggregate(
{ $match: {
user: user,
language: language
}},
{ $sort: {
date: 1
}},
{ $project : {
user : 1,
language : 1,
year : { $year: '$date' },
month : { $month: '$date' },
day : { $dayOfMonth: '$date'},
correct : 1,
incorrect : 1
}},
{ $group : {
_id : { year: "$year", month: "$month", day: "$day" },
correct : { $sum : "$correct" },
incorrect : { $sum : "$incorrect" }
}},
function(err, result){
console.log(result);
}
);
Produces output of:
[ { _id: { year: 2012, month: 10, day: 22 },
correct: 0,
incorrect: 0 } ]
You can assemble that into '2012-10-22' in code from there.