MongoDB - count items using addToSet - mongodb

I'm working with a collection called "submissions" that includes mission data for a game. Every time you submit your answer to the question/mission a document is saved to the submissions collection.
A typical document looks like this:
{
"_id" : ObjectId("5b0c99dffea598002fdb6ec9"),
"status" : "Accepted",
"missionAcceptanceDate" : NumberInt(1527526261),
"submissionDate" : NumberInt(1527552495),
"location" : "",
"approvalDate" : null,
"mission_id" : ObjectId("5b0c5b10fea598002fdb6ec6"),
"user_id" : ObjectId("5b0c99d0fea598002fdb6ec7")
}
I need to build a query that can determine the following:
Counts the total submissions per grouping by status between a date range,
Counts the Total number of submissions by mission_id per each grouping.
Below is what I have so far ("cutting out some info..."):
QUERY:
db.submissions.aggregate(
{ $match: {
"submissionDate": {
"$gte": NumberInt(1527901260), //Fri, 1 Jun 2018 18:00:01 PDT
"$lte": NumberInt(1530831600) //Thu, 5 Jul 2018 16:00:00 PDT
}
}
},
{$project:
{_id:1,
status: 1,
missionAcceptanceDate: 1,
submissionDate: 1,
approvalDate: 1,
user_id: 1,
mission_id: 1
}
},
{$group: {
_id: { status: "$status" },
statusTotal: { $sum: 1 },
mission_id: { $addToSet: "$mission_id" },
}
}
)
RESULT:
{
"_id" : {
"status" : "Rejected"
},
"statusTotal" : 16.0,
"mission_id" : [
ObjectId("5b0edbfafea598002fdb6f3a"),
ObjectId("5b0f0131fea598002fdb6f43"),
ObjectId("5b0eded6fea598002fdb6f3d"),
...
]
}
{
"_id" : {
"status" : "Approved"
},
"statusTotal" : 592.0,
"mission_id" : [
ObjectId("5b391cf4e177c700308dd0ef"),
ObjectId("5b36a0d172b5240030d304be"),
ObjectId("5b3558276a0f950030db1732"),
...
]
}
This is a great, but I also need to know the Total Number of Times each "mission_id" was "Rejected" or "Approved". The desired out come should look something like the following:
{
"_id" : {
"status" : "Rejected"
},
"statusTotal" : 16.0,
"mission_id" : [
...
{ mission_id: ObjectId("5b0edbfafea598002fdb6f3a"), count: 3 },
{ mission_id: ObjectId("5b0f0131fea598002fdb6f43"), count: 5},
{ mission_id: ObjectId("5b0eded6fea598002fdb6f3d"), count: 2 }
...
]
}
{
"_id" : {
"status" : "Approved"
},
"statusTotal" : 592.0,
"missionData" : [
{ mission_id: ObjectId("5b391cf4e177c700308dd0ef"), count: 23 },
{ mission_id: ObjectId("5b36a0d172b5240030d304be"), count: 45},
{ mission_id: ObjectId("5b3558276a0f950030db1732"), count: 15 }
...
]
}
I tried using $size, $sum, $count, even using a second $group with {$sum:1} for each mission_id, but just can't figure it out. Any help with this would be so much appreciated. Thank you in advance!

You need an extra grouping before your group if I understand your question correctly.
Something like
db.submissions.aggregate([
{"$match":{"submissionDate":{"$gte":NumberInt(1527901260),"$lte":NumberInt(1530831600)}}},
{"$group":{
"_id":{"status":"$status","mission_id":"$mission_id"},
"missionTotal":{"$sum":1}
}},
{"$group":{
"_id":"$_id.status",
"statusTotal":{"$sum":"$missionTotal"},
"mission_id":{"$push":{"mission_id":"$_id.mission_id","count":"$missionTotal"}}
}}
])

Related

Mongo $group and $count, and then $sort the result

I have a simple table with ranked users...
User:
{
"_id" : "aaa",
"rank" : 10
},
{
"_id" : "bbb",
"rank" : 30
},
{
"_id" : "ccc",
"rank" : 20
},
{
"_id" : "ddd",
"rank" : 30
},
{
"_id" : "eee",
"rank" : 30
},
{
"_id" : "fff",
"rank" : 10
}
And I would like to count how many have each rank, and then sort them with highest to lowest count
So I can get this result:
Result:
{
"rank" : 30,
"count": 3
},
{
"rank" : 10,
"count": 2
},
{
"rank" : 20,
"count": 1
}
I tried different things but cant seem to get the correct output
db.getCollection("user").aggregate([
{
"$group": {
"_id": {
"rank": "$rank"
},
"count": { "$sum": 1 }
},
"$sort": {
"count" : -1
}
])
I hope this is possible to do.
You can count and then sort them by aggregation in mongodb
db.getCollection('users').aggregate(
[
{
$group:
{
_id: "$rank",
count: { $sum: 1 }
}
},
{ $sort : { count : -1} }
]
)
Working example
https://mongoplayground.net/p/aM3Ci3GACjp
You don't need to add additional group or count stages when you can do it in one go -
db.getCollection("user").aggregate([
{
$sortByCount: "$rank"
}
])

How to subtract time series element to get differance to the date before?

I am trying to build a dashboard chart in Mongo-Atlas.
The Table should should show the date on x-axis, the _id on y-axis.
The Values should be the count difference to the date before.
I have a collection with data points such as:
_id: "someName"
timestamp: 2019-09-05T06:24:24.689+00:00
count: 50
_id: "someName"
timestamp: 2019-09-04T06:24:24.689+00:00
count: 40
...
The goal is to get the difference of the count to the data point before. Having the same name.
_id: "someName"
timestamp: 2019-09-05T06:24:24.689+00:00
count: 50
difference: 10
_id: "someName"
timestamp: 2019-09-04T06:24:24.689+00:00
count: 40
difference: 17
...
That way I could make a table listing the differences
so far I created a aggregation pipeline
[
{$sort: {
"timestamp": -1
}},
{$group: {
_id: "$_id",
count: {
$push: { count: "$count", timestamp: "$timestamp" }
}
}},
{$project: {
_id: "$_id",
count: "$count",
countBefore: { $slice: [ "$count", 1, { $size: "$count" } ] }
}}
]
I was hoping to substract count and countBefore such that i get an array with the datapoints an the difference...
So I tried to follow with:
{$project: {
countDifference: {
$map: {
input: "$countBefore",
as: "before",
in: {
$subtract: ["$$before.count", "$count.count"]
/*"$count.count" seems to be the problem, since an integer works*/
}
}
}
}
}
Mongo Atlas only shows "An unknown error occurred"
I would be glad for some advice :)
The following query can get us the expected output:
db.collection.aggregate([
{
$sort:{
"timestamp":1
}
},
{
$group:{
"_id":"$id",
"counts":{
$push:"$count"
}
}
},
{
$project:{
"differences":{
$reduce:{
"input":"$counts",
"initialValue":{
"values":[],
"lastValue":0
},
"in":{
"values":{
$concatArrays:[
"$$value.values",
[
{
$subtract:["$$this","$$value.lastValue"]
}
]
]
},
"lastValue":"$$this"
}
}
}
}
},
{
$project:{
"_id":0,
"id":"$_id",
"plots":"$differences.values"
}
}
]).pretty()
Data Set:
{
"_id" : ObjectId("5d724550ef5e6630fde5b71e"),
"id" : "someName",
"timestamp" : "2019-09-05T06:24:24.689+00:00",
"count" : 50
}
{
"_id" : ObjectId("5d724550ef5e6630fde5b71f"),
"id" : "someName",
"timestamp" : "2019-09-04T06:24:24.689+00:00",
"count" : 40
}
{
"_id" : ObjectId("5d724796ef5e6630fde5b720"),
"id" : "someName",
"timestamp" : "2019-09-06T06:24:24.689+00:00",
"count" : 61
}
{
"_id" : ObjectId("5d724796ef5e6630fde5b721"),
"id" : "someName",
"timestamp" : "2019-09-07T06:24:24.689+00:00",
"count" : 72
}
{
"_id" : ObjectId("5d724796ef5e6630fde5b722"),
"id" : "someName",
"timestamp" : "2019-09-08T06:24:24.689+00:00",
"count" : 93
}
{
"_id" : ObjectId("5d724796ef5e6630fde5b723"),
"id" : "someName",
"timestamp" : "2019-09-09T06:24:24.689+00:00",
"count" : 100
}
Output:
{ "id" : "someName", "plots" : [ 40, 10, 11, 11, 21, 7 ] }
Explanation: We are pushing count for the same id into counts array and then applying $reduce operation on it to prepare a set of new values in which current value would hold difference between the current and previous value of counts array. For the very first value, the previous value is taken as zero.

Multiple conditional sums in mongodb aggregation

I'm trying to return the total of requests by type based on their status:
If there is no status set, the request should be added to requested
If the status is ordered, the request should be added to ordered
If the status is arrived, the request should be added to arrived
caseRequest.aggregate([{
$group: {
_id: "$product",
suggested: {
$sum: {
$cond: [{
$ifNull: ["$status", true]
},
1, 0
]}
},
ordered: {
$sum: {
$cond: [{
$eq: ["$status", "ordered"]
},
1, 0
]
}
},
arrived: {
$sum: {
$cond: [{
$eq: ["$status", "arrived"]
},
1, 0
]
}
}
}
}
But for some reason it doesn't find any request status ordered or arrived. If in the database I have 48 requests, 45 of them without status, 2 with ordered and 1 with arrived, it returns:
[
{
_id: "xxx",
suggested: 48,
ordered: 0,
arrived: 0,
},
...
]
Try this approach,
Return the total number of requests by type based on their status
Now the simplest way to get the count of different status is to use aggregate pipeline with $group on the status field
db.stackoverflow.aggregate([{ $group: {_id: "$status", count: {$sum:1}} }])
We will be getting a result similar to this
{ "_id" : "", "count" : 2 }
{ "_id" : "arrived", "count" : 3 }
{ "_id" : "ordered", "count" : 4 }
The schema which is used to retrieve these records is very simple so that it will be easier to understand. The schema will have a parameter on the top level of the document and the value of status can be "ordered", "arrived" or empty
Schema
{ "_id" : ObjectId("5798c348d345404e7f9e0ced"), "status" : "ordered" }
The collection is populated with 9 records, with status as ordered, arrived and empty
db.stackoverflow.find()
{ "_id" : ObjectId("5798c348d345404e7f9e0ced"), "status" : "ordered" }
{ "_id" : ObjectId("5798c349d345404e7f9e0cee"), "status" : "ordered" }
{ "_id" : ObjectId("5798c34ad345404e7f9e0cef"), "status" : "ordered" }
{ "_id" : ObjectId("5798c356d345404e7f9e0cf0"), "status" : "arrived" }
{ "_id" : ObjectId("5798c357d345404e7f9e0cf1"), "status" : "arrived" }
{ "_id" : ObjectId("5798c358d345404e7f9e0cf2"), "status" : "arrived" }
{ "_id" : ObjectId("5798c35ad345404e7f9e0cf3"), "status" : "ordered" }
{ "_id" : ObjectId("5798c361d345404e7f9e0cf4"), "status" : "" }
{ "_id" : ObjectId("5798c362d345404e7f9e0cf5"), "status" : "" }
db.stackoverflow.count()
9
Hope it Helps!!

$project to contain the latest date from the group

I have a collection that is filled with messages, and I want to get a group of how many messages there are per room.
Gets messages within the last hour
Where gameId = 5550d868c5242fb3299a2604
Grouped by $room
Sorted by created
When I add the projection, the count works, but _id becomes null, and created doesn't even return.
What I am looking for is _id to contain $room, created to contain the newest date, and count to be the count of items.
db.chatMessages.aggregate([
{
$match: {
created: {
$gte: new Date((new Date()) - (1000 * 60 * 60))
},
gameId: ObjectId("5550d868c5242fb3299a2604")
}
},
{
$project: {created: 1}
},
{
$group: {
_id: "$room",
count: {$sum: 1}
}
},
{
$sort: {created: -1}
}
])
Here is some example data:
{
"gameId" : ObjectId("5550d868c5242fb3299a2604"),
"message" : "For real?",
"room" : "Main",
"name" : "Hello",
"created" : ISODate("2015-05-24T17:49:43.868Z"),
"_id" : ObjectId("55620f376aa13616759f4f06")
}
{
"gameId" : ObjectId("5550d868c5242fb3299a2604"),
"message" : "Yeah for reals...",
"room" : "Main",
"name" : "Jim",
"created" : ISODate("2015-05-24T17:49:59.135Z"),
"_id" : ObjectId("55620f476aa13616759f4f07")
}
Here is the returned output:
{
"result" : [
{
"_id" : null,
"count" : 4
}
],
"ok" : 1
}
Here is what I am looking for:
{
"result" : [
{
"_id" : "Main",
"count" : 4,
"created" : ISODate("2015-05-24T17:37:19.331Z")
}
],
"ok" : 1
}
So what is causing this to break when I add the $project?
I was able to get it by adding created to the group and doing $max on the column like this:
db.chatMessages.aggregate([
{
$project: {room: 1, created: 1}
},
{
$match: {
created: {
$gte: new Date((new Date()) - (1000 * 60 * 60 * 20000))
},
gameId: ObjectId("5550d868c5242fb3299a2604")
}
},
{
$group: {
_id: "$room",
count: {$sum: 1},
created: {$max: "$created"}
}
},
{
$sort: {created: -1}
}
])

MongoDB - Aggregate max/min/average for multiple variables at once

I am logging data into MongoDB in the following format:
{ "_id" : ObjectId("54f2393f80b72b00079d1a53"), "outT" : 10.88, "inT3" : 22.3, "light" : 336, "humidity" : 41.4, "pressure" : 990.31, "inT1" : 22.81, "logtime" : ISODate("2015-02-28T21:55:11.838Z"), "inT2" : 21.5 }
{ "_id" : ObjectId("54f2394580b72b00079d1a54"), "outT" : 10.88, "inT3" : 22.3, "light" : 338, "humidity" : 41.4, "pressure" : 990.43, "inT1" : 22.75, "logtime" : ISODate("2015-02-28T21:55:17.690Z"), "inT2" : 311.72 }
...
As you can see there is a single time element and multiple readings logged. I want to aggregate across all of the readings to provide a max min and average for each variable grouped by hour of day. I have managed to do this for a single variable using the following aggregation script:
db.logs.aggregate(
[
{
$match: {
logtime: {
$gte: ISODate("2015-03-01T00:00:00.000Z"),
$lt: ISODate("2015-03-03T00:00:00.000Z")
}
}
},
{
$project: {_id: 0, logtime: 1, outT: 1}
},
{
$group: {
_id: {
day: {$dayOfYear: "$logtime"},
hour: {$hour: "$logtime"}
},
average: {$avg: "$outT"},
max: {$max: "$outT"},
min:{$min: "$outT"}
}
}
]
)
which produces:
{ "_id" : { "day" : 61, "hour" : 22 }, "average" : 3.1878750000000116, "max" : 3.44, "min" : 3 }
{ "_id" : { "day" : 61, "hour" : 14 }, "average" : 13.979541666666638, "max" : 17.81, "min" : 8.81 }
...
I would like to produce output which looks like:
{"outT": { output from working aggregation above },
"inT1": { ... },
...
}
Everything I try seems to throw an error in the mongo console. Can anyone help?
Thanks
You can do this by including each statistic in your $group with a different name and then following that with a $project stage to reshape it into your desired format:
db.logs.aggregate([
{
$match: {
logtime: {
$gte: ISODate("2015-02-28T00:00:00.000Z"),
$lt: ISODate("2015-03-03T00:00:00.000Z")
}
}
},
{
$project: {_id: 0, logtime: 1, outT: 1, inT1: 1}
},
{
$group: {
_id: {
day: {$dayOfYear: "$logtime"},
hour: {$hour: "$logtime"}
},
outT_average: {$avg: "$outT"},
outT_max: {$max: "$outT"},
outT_min:{$min: "$outT"},
inT1_average: {$avg: "$inT1"},
inT1_max: {$max: "$inT1"},
inT1_min:{$min: "$inT1"}
}
},
{
$project: {
outT: {
average: '$outT_average',
max: '$outT_max',
min: '$outT_min'
},
inT1: {
average: '$inT1_average',
max: '$inT1_max',
min: '$inT1_min'
}
}
}
])
This gives you output that looks like:
{
"_id" : {
"day" : 59,
"hour" : 21
},
"outT" : {
"average" : 10.88,
"max" : 10.88,
"min" : 10.88
},
"inT1" : {
"average" : 22.78,
"max" : 22.81,
"min" : 22.75
}
}
$max in Mongodb gets the maximum of the corresponding values from all documents in the collection. $min gets the minimum values from all documents in the collection. $avg gets the average value from the collection.
you must go through the Mongodb link for sample examples.