Changing format of mongoDB aggregation result [duplicate] - mongodb

This question already has an answer here:
Promote subfields to top level in projection without listing all keys
(1 answer)
Closed 4 years ago.
Currently I am running this query to calculate averages and to return the data in a specific format:
db.metrics.aggregate([
{
$unwind:"$data"
},
{
$group:{
_id:"$data.configName",
avg:{
$avg:"$data.linesCount"
},
data:{
$last:"$data"
},
date:{
$last:"$date"
}
}
}
]).pretty()
On a collection which contains objects in this format:
{
"_id" : {
"date" : 1526569274000,
}
"date" : "20150220",
"data" : [
{
"configName" : "aaa",
"linesCount" : 500,
"insertedLinesCount" : 658,
}
],
"applicationName" : "loader"
}
Which returns this result:
{
"_id" : "aaa",
"avg" : 500,
"data" : {
"configName" : "aaa",
"linesCount" : 500,
"insertedLinesCount" : 658,
"succeeded" : true
},
"date" : "20150220"
}
The details are correct but I'd like to change the format. Is there any way to take what is in the data object and return it so that the final result is a list of 1-1 mappings, like so:
{
"_id" : "aaa",
"avg" : 500,
"configName" : "aaa",
"linesCount" : 500,
"insertedLinesCount" : 658,
"fileFormat" : "",
"date" : "20150220"
}

You need to use the $project stage at the end of the result
db.collection.aggregate([
{
$unwind: "$data"
},
{
$group: {
_id: "$data.configName",
avg: {
$avg: "$data.linesCount"
},
data: {
$last: "$data"
},
date: {
$last: "$date"
}
}
},
{
$project: {
configName: "$data.configName",
insertedLinesCount: "$data.insertedLinesCount",
linesCount: "$data.linesCount",
succeeded: "$data.succeeded",
_id: 1,
avg: 1,
date: 1
}
}
])
above query gives you the following result... check it here
[
{
"_id": "aaa",
"avg": 500,
"configName": "aaa",
"date": "20150220",
"insertedLinesCount": 658,
"linesCount": 500,
"succeeded": true
}
]

Add a $replaceRoot stage after $group
{
$replaceRoot: {
newRoot: {
_id: "$_id",
avg: "$avg",
configName: "$data.configName"
...
}
}
}

Related

In MongoDB Shell, How to query for day-by-day data for a time period (such as a month)

I want to get day wise data grouped by class (field from my collection) for a month of period.
This is what i have tried to query, but doesn't give the expected output.
query:
db.collection.aggregate([
// Get only records created in the last 30 days
{$match:{
"created_at":{$gt: new Date(ISODate().getTime() - 1000*60*60*24*30)},
}},
// Get the year, month and day from the created_at TimeStamp
{$project:{
"year":{$year:"$created_at"},
"month":{$month:"$created_at"},
"day": {$dayOfMonth:"$created_at"}
}},
// Group by year, month and day and get the count
{$group:{
_id:{year:"$year", month:"$month", day:"$day"},
"count":{$sum:1}
}},
// Group by class field
{ $group: {
_id: "$meta.class",
total: {$sum: 1}
}}
])
expected output:
{ "_id" : { "year" : 2021, "month" : 10, "day" : 01 }, "count" : {{ "_id" : "class1", "total" : 15 }, { "_id" : "class2", "total" : 25 }} }
{ "_id" : { "year" : 2021, "month" : 10, "day" : 02 }, "count" : {{ "_id" : "class2", "total" : 25 }, { "_id" : "class3", "total" : 10 }} }
...
{ "_id" : { "year" : 2021, "month" : 10, "day" : 30 }, "count" : {{ "_id" : "class3", "total" : 50 }} }
So, could someone please tell me how to attain the desired result or what I'm doing wrong?
Thanks.
---EDIT---
Sample Data:
mongoplayground link : here
[
{
"key": 1,
"_id": ObjectId("615602280000000000000000"),
"created_at": ISODate("2021-09-30T18:30:00.000Z"),
"meta": {
"class": "class1",
}
},
{
"key": 2,
"_id": ObjectId("615753a80000000000000000"),
"created_at": ISODate("2021-10-01T18:30:00.000Z"),
"meta": {
"class": "class1",
}
},
{
"key": 3,
"_id": ObjectId("615764100000000000000000"),
"created_at": ISODate("2021-10-01T19:40:00.000Z"),
"meta": {
"class": "class1",
}
},
{
"key": 4,
"_id": ObjectId("615776d00000000000000000"),
"created_at": ISODate("2021-10-01T21:00:00.000Z"),
"meta": {
"class": "class2",
}
}
]
sample o/p:
[
{
"_id":{
"year":2021,
"month":10,
"day":1
},
"count":[
{
"_id":"class1",
"total":1
}
]
},
{
"_id":{
"year":2021,
"month":10,
"day":2
},
"count":[
{
"_id":"class1",
"total":2
},
{
"_id":"class2",
"total":1
}
]
}
]
Try this one:
db.collection.aggregate([
{ $match: { created_at: { $gt: moment().startOf('day').subtract(30, 'day').toDate() } } },
{
$group: {
_id: {
day: { $dateTrunc: { date: "$created_at", unit: "day" } },
class: "$meta.class"
},
total: { $count: {} }
}
},
{
$group: {
_id: "$_id.day",
count: { $push: { total: "$total", class: "$_id.class" } }
}
}
])
Whenever I have to work with date/time values then I suggest to use the moment.js library.
$dateTrunc() requires MongoDB version 5.0. If you prefer or require year/month/day parts it should be fairly obvious how to modify the aggregation pipeline.

Summarize documents by day of the week

For each student in a collection, I have an array of absences. I want to summarize the data by displaying the number of absences for each day of the week.
Given the following input:
{
"_id" : 9373,
"absences" : [
{
"code" : "U",
"date" : ISODate("2021-01-17T00:00:00.000+0000"),
"full_day" : false,
"remote" : false,
"dayNumber" : 1,
"dayName" : "Sunday"
}
]
}
{
"_id" : 9406,
"absences" : [
{
"code" : "E",
"date" : ISODate("2020-12-09T00:00:00.000+0000"),
"full_day" : false,
"remote" : false,
"dayNumber" : 4,
"dayName" : "Wednesday"
},
{
"code" : "U",
"date" : ISODate("2021-05-27T00:00:00.000+0000"),
"full_day" : false,
"remote" : false,
"dayNumber" : 5,
"dayName" : "Thursday"
}
]
}
How can I achieve the following output:
[
{
"_id": 9373,
"days": [
{
"dayNumber": 1,
"dayName": "Sunday",
"count": 1
}
]
},
{
"_id": 9406,
"days": [
{
"dayNumber": 4,
"dayName": "Wednesday",
"count": 1
},
{
"dayNumber": 5,
"dayName": "Thursday",
"count": 1
}
]
}
]
I've pushed all the required fields to this stage of the pipeline. I'm just not clear how to roll up the data in the nested absences array.
$unwind deconstruct absences array
$group by _id and dayNumber, and get count of grouped documents
$group by _id and reconstruct days array
db.collection.aggregate([
{ $unwind: "$absences" },
{
$group: {
_id: {
_id: "$_id",
dayNumber: "$absences.dayNumber"
},
dayName: { $first: "$absences.dayName" },
count: { $sum: 1 }
}
},
{
$group: {
_id: "$_id._id",
days: {
$push: {
dayName: "$dayName",
dayNumber: "$_id.dayNumber",
count: "$count"
}
}
}
}
])
Playground

Counting results in aggregate selection

My MongoDB database have a structure
{
"_id" : ObjectId("5c1ccc20fc0f60769227d455"),
"type" : 0,
"id" : "hwJyzAHyfjXUlrGhblT7txWd",
"userowner" : 1.0,
"campid" : "9548",
"date" : 1545391136,
"useragent" : "mozilla/5.0 (windows nt 10.0; win64; x64; rv:65.0) gecko/20100101 firefox/65.0",
"domain" : "",
"referer" : "",
"country" : "en",
"language" : "en-US",
"languages" : [
"en-US",
"en"
],
"screenres" : [
"1920*1080"
],
"avscreenres" : [
"1080*1858"
],
"webgl" : "angle (nvidia geforce gtx 1060 6gb direct3d11 vs_5_0 ps_5_0)",
"hash" : 123,
"timezone" : -180,
"result" : true,
"resultreason" : "learning",
"remoteip" : "0.0.0.0"
}
Every a document have a vield "result" with a bool value.
I make aggregation selection:
db.getCollection('clicks').aggregate([
{ $match: {userowner: 1, date:{$gte: 0, $lte: 9545392055}} },
{ $group : {_id : "$campid",
number: {$sum: 1}}}
])
and get a Result:
/* 1 */
{
"_id" : "4587",
"number" : 2.0
}
/* 2 */
{
"_id" : "9548",
"number" : 1346.0
}
How can count the amount of value "true" and "false" in a field "result" and get a result like this:
/* 1 */
{
"_id" : "4587",
"number" : 2.0,
"passed":100,
"blocked":120
}
/* 2 */
{
"_id" : "9548",
"number" : 1346.0,
"passed":100,
"blocked":120
}
I hope this works as per your requirement.
db.getCollection('clicks').aggregate(
[
{
$match: {
userowner: 1, date: {
$gte: 0, $lte: 9545392055
}
}
},
{
$group: {
_id: "$campid", passed: {
$sum: {
$cond:
[
{ $eq: ["$result", true] },
1, 0
]
}
},
blocked: {
$sum: {
$cond:
[
{
$eq: ["$result", false]
}
, 1, 0]
}
},
number: { $sum: 1 }
}
},
{
$project: {
_id: 0,
campid: "$_id",
number: 1,
passed: 1,
blocked: 1
}
}
])
Output:-
{
"passed" : 3,
"blocked" : 2,
"number" : 5,
"campid" : "4587"
}
{
"passed" : 2,
"blocked" : 1,
"number" : 3,
"campid" : "9548"
}
Refer $group, $cond, and $eq for more info.
With MongoDb 3.6 and newer, you can leverage the use of $arrayToObject operator within a $replaceRoot pipeline to get the desired result.
You would need to group the documents intially by the campid and the result field, aggregate the sum and pass the results to yet another group pipeline stage. This group stage will prepare the documents in a way that $arrayToObject operator will give you the desired object by creating a key-value array using $push.
The result from this is then fed to the $replaceRoot pipeline to bring the passed and blocked fields to the root of the document.
The following aggregate pipeline describes the above:
db.getCollection('clicks').aggregate([
{ "$match": { "userowner": 1, "date": { "$gte": 0, "$lte": 9545392055 } } },
{ "$group": {
"_id": {
"campid": "$campid",
"result": { "$cond": [ "$result", "passed", "blocked" ] }
},
"count": { "$sum": 1 }
} },
{ "$group": {
"_id": "$_id.campid",
"number": { "$sum": "$count" },
"counts": {
"$push": {
"k": "$_id.result",
"v": "$count"
}
}
} },
{ "$replaceRoot": {
"newRoot": {
"$mergeObjects": [
{ "$arrayToObject": "$counts" },
"$$ROOT"
]
}
} },
{ "$project": { "counts": 0 } }
])

using mongo aggregation how to replace the fields names [duplicate]

I have large collection of documents which represent some kind of events. Collection contains events for different userId.
{
"_id" : ObjectId("57fd7d00e4b011cafdb90d22"),
"userId" : "123123123",
"userType" : "mobile",
"event_type" : "clicked_ok",
"country" : "US",
"timestamp" : ISODate("2016-10-12T00:00:00.308Z")
}
{
"_id" : ObjectId("57fd7d00e4b011cafdb90d22"),
"userId" : "123123123",
"userType" : "mobile",
"event_type" : "clicked_cancel",
"country" : "US",
"timestamp" : ISODate("2016-10-12T00:00:00.308Z")
}
At midnight I need to run aggregation for all documents for the previous day. Documents need to aggregated in the way so I could get number of different events for particular userId.
{
"userId" : "123123123",
"userType" : "mobile",
"country" : "US",
"clicked_ok" : 23,
"send_message" : 14,
"clicked_cancel" : 100,
"date" : "2016-11-24",
}
During aggregation I need to perform two things:
calculate number of events for particular userId
add "date" text fields with date
Any help is greatly appreciated! :)
you can do this with aggregation like this :
db.user.aggregate([
{
$match:{
$and:[
{
timestamp:{
$gte: ISODate("2016-10-12T00:00:00.000Z")
}
},
{
timestamp:{
$lt: ISODate("2016-10-13T00:00:00.000Z")
}
}
]
}
},
{
$group:{
_id:"$userId",
timestamp:{
$first:"$timestamp"
},
send_message:{
$sum:{
$cond:[
{
$eq:[
"$event_type",
"send_message"
]
},
1,
0
]
}
},
clicked_cancel:{
$sum:{
$cond:[
{
$eq:[
"$event_type",
"clicked_cancel"
]
},
1,
0
]
}
},
clicked_ok:{
$sum:{
$cond:[
{
$eq:[
"$event_type",
"clicked_ok"
]
},
1,
0
]
}
}
}
},
{
$project:{
date:{
$dateToString:{
format:"%Y-%m-%d",
date:"$timestamp"
}
},
userId:1,
clicked_cancel:1,
send_message:1,
clicked_ok:1
}
}
])
explanation:
keep only document for a specific day in $match stage
group doc by userId and count occurrences for each event in $group stage
finally format the timestamp field into yyyy_MM-dd format in $project stage
for the data you provided, this will output
{
"_id":"123123123",
"send_message":0,
"clicked_cancel":1,
"clicked_ok":1,
"date":"2016-10-12"
}
Check the following query
db.sandbox.aggregate([{
$group: {
_id: {
userId: "$userId",
date: {
$dateToString: { format: "%Y-%m-%d", date: "$timestamp" }}
},
send_message: {
$sum: {
$cond: { if: { $eq: ["$event_type", "send_message"] }, then: 1, else: 0 } }
},
clicked_cancel: {
$sum: {
$cond: { if: { $eq: ["$event_type", "clicked_cancel"] }, then: 1, else: 0 }
}
},
clicked_ok: {
$sum: {
$cond: { if: { $eq: ["$event_type", "clicked_ok"] }, then: 1, else: 0 }
}
}
}
}])

Aggregate Fields together

I have the following structure as an input from which data needs to be aggregated:
I need to aggregate the data such that I end up with the following structure:
start: A {
tripdetails: [{
destination: B [{
duration: 10,
type: male
},
duration: 12,
type: female
},
duration: 9,
type: female
}]
]}
}
Basically I need to group "type" and "duration" together under the same destination.
I came up with the following query, but this results in a a single field for "type" for each "destination", but not for every "duration".
db.test.aggregate(
{
$group: {
_id: {"StationID": "$start", "EndStationID": "$destination"},
durations: {$addToSet: "$duration" },
usertypes: {$addToSet: "$type" }
}
},
{
$group: {
_id: "$_id.StationID",
Tripcount_out: {$sum: "durations"},
Trips_out: { $addToSet: { EndStationID: "$_id.EndStationID", Tripduration: "$durations", Usertype: "$usertypes"} }
}
}
)
My question is how I can achieve the structure described above.
You could try running the following aggregate pipeline:
db.test.aggregate([
{
"$group": {
"_id": { "StationID": "$start", "EndStationID": "$destination" },
"details": {
"$push": {
"duration": "$duration",
"type": "$type"
}
}
}
},
{
"$group": {
"_id": "$_id.StationID",
"tripdetails": {
"$push": {
"destination": "$_id.EndStationID",
"trips": "$details"
}
}
}
}
])
which yields:
{
"_id" : "A",
"tripdetails" : [
{
"destination" : "B",
"trips" : [
{
"duration" : 10,
"type" : "male"
},
{
"duration" : 9,
"type" : "female"
},
{
"duration" : 12,
"type" : "female"
}
]
}
]
}