I've got the following query that works fine. However adding a sort to the aggregation pipeline does not actually sort it. It does change the order of the results but does not properly sort.
Any idea why it refuses to sort the results by date?
db.calls.aggregate({
$match: {
'_id.date': {
$gte: ISODate('2014-04-13T00:00:00.000Z'),
$lte: ISODate('2014-04-20T00:00:00.000Z')
}
}
}, {
$project: {
calls: 1,
year: {
$year: '$_id.date'
},
month: {
$month: '$_id.date'
},
day: {
$dayOfMonth: '$_id.date'
}
}
}, {
$group: {
_id: {
name: "$_id.name",
day: '$day',
year: '$year',
month: '$month'
},
date: {
'$first': '$_id.date'
},
calls: {
'$sum': '$calls'
}
}
}, {
$group: {
_id: '$_id.name',
records: {
$addToSet: {
date: '$date',
calls: '$calls'
}
}
}
}, {
$sort: {
'records.date': 1
}
})
Outputs:
/* 0 */
{
"result" : [
{
"_id" : "a",
"records" : [
{
"date" : ISODate("2014-04-13T00:00:00.000Z"),
"calls" : 522
},
{
"date" : ISODate("2014-04-16T00:00:00.000Z"),
"calls" : 1523
},
{
"date" : ISODate("2014-04-20T00:00:00.000Z"),
"calls" : 57
},
{
"date" : ISODate("2014-04-14T00:00:00.000Z"),
"calls" : 1540
},
{
"date" : ISODate("2014-04-15T00:00:00.000Z"),
"calls" : 1592
},
{
"date" : ISODate("2014-04-17T00:00:00.000Z"),
"calls" : 1466
},
{
"date" : ISODate("2014-04-18T00:00:00.000Z"),
"calls" : 1003
},
{
"date" : ISODate("2014-04-19T00:00:00.000Z"),
"calls" : 623
}
]
},
{
"_id" : "b",
"records" : [
{
"date" : ISODate("2014-04-15T00:00:00.000Z"),
"calls" : 102
},
{
"date" : ISODate("2014-04-17T00:00:00.000Z"),
"calls" : 97
},
{
"date" : ISODate("2014-04-16T00:00:00.000Z"),
"calls" : 116
},
{
"date" : ISODate("2014-04-13T00:00:00.000Z"),
"calls" : 118
},
{
"date" : ISODate("2014-04-14T00:00:00.000Z"),
"calls" : 142
},
{
"date" : ISODate("2014-04-19T00:00:00.000Z"),
"calls" : 68
},
{
"date" : ISODate("2014-04-18T00:00:00.000Z"),
"calls" : 100
},
{
"date" : ISODate("2014-04-20T00:00:00.000Z"),
"calls" : 6
}
]
},
{
"_id" : "c",
"records" : [
{
"date" : ISODate("2014-04-17T00:00:00.000Z"),
"calls" : 137130
},
{
"date" : ISODate("2014-04-14T00:00:00.000Z"),
"calls" : 139497
},
{
"date" : ISODate("2014-04-13T00:00:00.000Z"),
"calls" : 92166
},
{
"date" : ISODate("2014-04-18T00:00:00.000Z"),
"calls" : 123129
},
{
"date" : ISODate("2014-04-15T00:00:00.000Z"),
"calls" : 146390
},
{
"date" : ISODate("2014-04-20T00:00:00.000Z"),
"calls" : 4515
},
{
"date" : ISODate("2014-04-16T00:00:00.000Z"),
"calls" : 141792
},
{
"date" : ISODate("2014-04-19T00:00:00.000Z"),
"calls" : 104847
}
]
}
],
"ok" : 1
}
Document structure:
{
"_id" : {
"date" : ISODate("2014-04-08T12:00:00.000Z"),
"name" : "a"
},
"calls" : 515
}
Fixed by doing a $sort before the last group and switching out $addToSet with $push so the order is preserved.
db.calls.aggregate({
$match: {
'_id.date': {
$gte: ISODate('2014-04-13T00:00:00.000Z'),
$lte: ISODate('2014-04-20T00:00:00.000Z')
}
}
}, {
$project: {
calls: 1,
year: {
$year: '$_id.date'
},
month: {
$month: '$_id.date'
},
day: {
$dayOfMonth: '$_id.date'
}
}
}, {
$group: {
_id: {
name: "$_id.name",
day: '$day',
year: '$year',
month: '$month'
},
date: {
'$first': '$_id.date'
},
calls: {
'$sum': '$calls'
}
}
}, {
$sort: {
date: 1
}
}, {
$group: {
_id: '$_id.name',
records: {
$push: {
date: '$date',
calls: '$calls'
}
}
}
})
Related
I have a schema named orders which looks like this :
{
"_id" : ObjectId("5cd42f7b16c2654ea9138ece"),
"customerId" : ObjectId("5c8222109146d119ccc5243f"),
"orderAmount" : NumberInt(10000),
"paidAmount" : NumberInt(4000),
"installments" : [
{
"dueDate" : ISODate("2020-01-01"),
"amount" : NumberInt(2000)
},
{
"dueDate" : ISODate("2020-01-07"),
"amount" : NumberInt(6000)
},
{
"dueDate" : ISODate("2020-01-04"),
"amount" : NumberInt(2000)
}
]
}
I want to write an aggregation function that sorts the installments according to dueDate and mark them paid according to paidAmount. For example for this case the function should return
{
"_id" : ObjectId("5cd42f7b16c2654ea9138ece"),
"customerId" : ObjectId("5c8222109146d119ccc5243f"),
"orderAmount" : NumberInt(10000),
"paidAmount" : NumberInt(4000),
"installments" : [
{
"dueDate" : ISODate("2020-01-01"),
"amount" : NumberInt(2000),
"paid" : true
},
{
"dueDate" : ISODate("2020-01-04"),
"amount" : NumberInt(2000),
"paid" : true
},
{
"dueDate" : ISODate("2020-01-07"),
"amount" : NumberInt(6000),
"paid" : false
}
]
}
Now I can sort the array using the $unwind and $sort functions like this:
db.orders.aggregate([
{$unwind : "$installments"},
{$sort : {"dueDate" : 1}}
]);
What I am stuck on is how to group the array back so that it gives me the desired result. I can only use aggregation here.
You need to $group installments. But, if you need to put paid field with some logic, it's necessary to add extra pipeline stages.
ASSUMPTION
paidAmount value calculated by ordered installments.[].paid
paidAmount installments.[].paid
4000 <= 2000(t) 2000(t) 6000(f)
4000 <≠ 2000(t) 6000(f) 2000(f)
4000 <≠ 6000(f) 2000(f) 2000(f)
6000 <= 6000(t) 2000(f) 2000(f)
6000 <≠ 1000(t) 6000(f) 1000(f)
6000 <= 1000(t) 4000(t) 1000(t)
EXPLANATION paid:true|false LOGIC
We order installments and create extra tmp field with installments value (for paid field).
For each installments item i, we mark paid:true if paidAmount - sum(amount0 - i) >= 0.
db.orders.aggregate([
{
$unwind: "$installments"
},
{
$sort: {
"installments.dueDate": 1
}
},
{
$group: {
_id: "$_id",
orders: {
$first: "$$ROOT"
},
installments: {
$push: "$installments"
},
tmp: {
$push: "$installments"
}
}
},
{
$unwind: "$installments"
},
{
$addFields: {
"installments.paid": {
$cond: [
{
$gte: [
{
$reduce: {
input: {
$slice: [
"$tmp",
{
$sum: [
{
$indexOfArray: [
"$tmp",
"$installments"
]
},
1
]
}
]
},
initialValue: "$orders.paidAmount",
in: {
$sum: [
{
$multiply: [
"$$this.amount",
-1
]
},
"$$value"
]
}
}
},
0
]
},
true,
false
]
}
}
},
{
$group: {
_id: "$_id",
customerId: {
$first: "$orders.customerId"
},
orderAmount: {
$first: "$orders.orderAmount"
},
paidAmount: {
$first: "$orders.paidAmount"
},
installments: {
$push: "$installments"
}
}
}
])
MongoPlayground
Try this :
db.yourCollectionName.aggregate([{ $unwind: '$installments' },{ $sort: { 'installments.dueDate': 1 } },
{ $addFields: { 'installments.paid': { $cond: [{ $lte: ["$installments.amount", '$paidAmount'] }, true, false] } } },
{ $group: { _id: '$id', data: { $first: '$$ROOT' }, installments: { $push: '$installments' } } },
{ $addFields: { 'data.installments': '$installments' } },
{ $replaceRoot: { newRoot: "$data" } }])
Collection Data :
/* 1 */
{
"_id" : ObjectId("5cd42f7b16c2654ea9138ece"),
"customerId" : ObjectId("5c8222109146d119ccc5243f"),
"orderAmount" : 10000,
"paidAmount" : 4000,
"installments" : [
{
"dueDate" : ISODate("2020-01-01T21:21:20.202Z"),
"amount" : 2000
},
{
"dueDate" : ISODate("2020-01-07T21:27:20.202Z"),
"amount" : 6000
},
{
"dueDate" : ISODate("2020-01-04T21:24:20.202Z"),
"amount" : 2000
}
]
}
Result :
/* 1 */
{
"_id" : ObjectId("5cd42f7b16c2654ea9138ece"),
"customerId" : ObjectId("5c8222109146d119ccc5243f"),
"orderAmount" : 10000,
"paidAmount" : 4000,
"installments" : [
{
"dueDate" : ISODate("2020-01-01T21:21:20.202Z"),
"amount" : 2000,
"paid" : true
},
{
"dueDate" : ISODate("2020-01-04T21:24:20.202Z"),
"amount" : 2000,
"paid" : true
},
{
"dueDate" : ISODate("2020-01-07T21:27:20.202Z"),
"amount" : 6000,
"paid" : false
}
]
}
Ref : aggregation-pipeline
I have a mongodb database with a collection user_arrival. The documents look like:
{
"_id" : ObjectId("5e0431821d0f986bdb338e82"),
"location" : {
"uuid" : "b8e671f3-742e-4a24-ae3e-6cd543e7c5bb",
"name" : "London"
},
"time" : ISODate("2019-12-16T16:37:45.000Z"),
"actor" : {
"user" : {
"uuid" : "c42f7cbf-84a6-4912-b414-6afc873e229d",
"name" : "User 1"
}
}
},
{
"_id" : ObjectId("5e0431d31d0f986bdb338e83"),
"location" : {
"uuid" : "b8e671f3-742e-4a24-ae3e-6cd543e7c5bb",
"name" : "London"
},
"time" : ISODate("2019-12-16T17:00:00.000Z"),
"actor" : {
"user" : {
"uuid" : "c42f7cbf-84a6-4912-b414-6afc873e229d",
"name" : "User 1"
}
}
},
{
"_id" : ObjectId("5e0431e41d0f986bdb338e84"),
"location" : {
"uuid" : "b8e671f3-742e-4a24-ae3e-6cd543e7c5bb",
"name" : "London"
},
"time" : ISODate("2019-12-16T17:05:00.000Z"),
"actor" : {
"user" : {
"uuid" : "c42f7cbf-84a6-4912-b414-6afc873e229d",
"name" : "User 1"
}
}
},
{
"_id" : ObjectId("5e04320f1d0f986bdb338e85"),
"location" : {
"uuid" : "b8e671f3-742e-4a24-ae3e-6cd543e7c5bb",
"name" : "London"
},
"time" : ISODate("2019-12-16T17:06:00.000Z"),
"actor" : {
"user" : {
"uuid" : "d42f7cbf-84a6-4912-b414-6afc873e229d",
"name" : "User 2"
}
}
},
{
"_id" : ObjectId("5e0432191d0f986bdb338e86"),
"location" : {
"uuid" : "b8e671f3-742e-4a24-ae3e-6cd543e7c5bb",
"name" : "London"
},
"time" : ISODate("2019-12-16T17:15:00.000Z"),
"actor" : {
"user" : {
"uuid" : "d42f7cbf-84a6-4912-b414-6afc873e229d",
"name" : "User 2"
}
}
}
According to above documents
User 1 - First-time card scan at 16:37:45
User 1 - Second-time card scan at 17:00:00
User 1 - Third-time card scan at 17:05:00
User 2 - First-time card scan at 17:06:00
User 2 - Second-time card scan at 17:15:00
Mongodb Group Query
db.getCollection('test').aggregate([
{
$match: {
time: { $gt: new Date('2019-12-16T00:00:00.000Z'), $lte: new Date('2019-12-17T00:00:00.000Z') },
'location.uuid': 'b8e671f3-742e-4a24-ae3e-6cd543e7c5bb',
},
},
{
$group: {
_id: {
hour: { $hour: { date: '$time' } },
minute: {
$subtract: [
{ $minute: { date: '$time'} },
{ $mod: [{ $minute: '$time' }, 60] },
],
},
},
count: { $addToSet: '$actor.user.uuid' },
},
},
{
$project: {
start_time: { $add: ['$_id.hour', 0] },
end_time: { $add: ['$_id.hour', 1] },
total_user_arrivals: { $size: '$count' },
_id: 0,
},
},
])
When I execute the above MongoDB query below results get.
[{
"start_time" : 16.0,
"end_time" : 17.0,
"total_user_arrivals" : 1
},
{
"start_time" : 17.0,
"end_time" : 18.0,
"total_user_arrivals" : 2
}]
Expexted Result
User 1 first-time card scan between 16:00:00 to 17:00:00
User 2 first-time card scan between 17:00:00 to 18:00:00
[{
"start_time" : 16.0,
"end_time" : 17.0,
"total_user_arrivals" : 1
},
{
"start_time" : 17.0,
"end_time" : 18.0,
"total_user_arrivals" : 1
}]
Before grouping, you need to get the first transaction of the users.
db.getCollection('test').aggregate([
{
$match: {
time: { $gt: new Date('2019-12-16T00:00:00.000Z'), $lte: new Date('2019-12-17T00:00:00.000Z') },
'location.uuid': 'b8e671f3-742e-4a24-ae3e-6cd543e7c5bb',
},
},
{ $sort: { time: 1 } },
{
$group: {
_id: {
uuid: '$actor.user.uuid',
},
tran: { $first: '$$ROOT' },
},
},
{
$group: {
_id: {
hour: { $hour: { date: '$tran.time' } },
minute: {
$subtract: [
{ $minute: { date: '$tran.time'} },
{ $mod: [{ $minute: '$tran.time' }, 60] },
],
},
},
count: { $addToSet: '$tran.actor.user.uuid' },
},
},
{
$project: {
start_time: { $add: ['$_id.hour', 0] },
end_time: { $add: ['$_id.hour', 1] },
total_user_arrivals: { $size: '$count' },
_id: 0,
},
},
])
I can use this query to get the average sqmPrice for a myArea
db.getCollection('sold').aggregate([
{$match:{}},
{$group: {_id: "$myArea", "sqmPrice": {$avg: "$sqmPrice"} }}
])
Output:
[
{
"_id" : "Yttre Aspudden",
"sqmPrice" : 48845.7777777778
},
{
"_id" : "Hägerstensåsen",
"sqmPrice" : 120
}
]
I would like to group this by year, ideally an object that looks like this:
{
"Yttre Aspudden": {
2008: 1232,
2009: 1244
...
}
...
}
but the formatting is not the most important.
Here is a sample object, I would like to use soldDate:
{
"_id" : ObjectId("5beca41c78f21248ab47f4a6"),
"location" : {
"address" : {
"streetAddress" : "Ljusstöparbacken 26C"
},
"position" : {
"latitude" : 59.31427884,
"longitude" : 18.00892421
},
"namedAreas" : [
"Hägersten-Liljeholmen"
],
"region" : {
"municipalityName" : "Stockholm",
"countyName" : "Stockholms län"
},
"distance" : {
"ocean" : 3777
}
},
"listPrice" : 1895000,
"rent" : 1959,
"floor" : 1,
"livingArea" : 38.5,
"source" : {
"name" : "Fastighetsbyrån",
"id" : 1573,
"type" : "Broker",
"url" : "http://www.fastighetsbyran.se/"
},
"rooms" : 1.5,
"published" : ISODate("2018-11-02T20:55:19.000Z"),
"constructionYear" : 1959,
"objectType" : "Lägenhet",
"booliId" : 3278478,
"soldDate" : ISODate("2018-11-14T00:00:00.000Z"),
"soldPrice" : 2620000,
"soldPriceSource" : "bid",
"url" : "https://www.booli.se/annons/3278478",
"publishedDays" : 1735,
"soldDays" : 1747,
"daysUp" : 160,
"street" : "Ljusstöparbacken",
"streetYear" : "Ljusstöparbacken Hägersten-Liljeholmen 1959",
"yearDay" : 318,
"yearWeek" : 46,
"roughSize" : 40,
"sqmPrice" : 49221,
"myArea" : "Gröndal",
"hotlist" : true
}
You need to generate your keys dynamically so you have to use $arrayToObject. To build an object which aggregates the data you need three $group stages and to create new root of your document you can use $replaceRoot, try:
db.sold.aggregate([
{ $group: {_id: { area: "$myArea", year: { $year: "$soldDate" } }, "sqmPrice": {$avg: "$sqmPrice"} }},
{ $group: { _id: "$_id.area", avgs: { $push: { k: { $toString: "$_id.year" }, v: "$sqmPrice" } } } },
{ $group: { _id: null, areas: { $push: { k: "$_id", v: { $arrayToObject: "$avgs" } } } } },
{ $replaceRoot: { newRoot: { $arrayToObject: "$areas" } } }
])
Hello I am working with the reporting api which will going to use in highcharts and I am new to mongodb.
Below is my aggregation query (suggest me modification) :
db.product_sold.aggregate({
$group: {
_id: { year: { $year: "$solddate" }, month: { $month: "$solddate" }, productid: "$productid" },
totalQty: { $sum: "$qty" }
}
})
Output:
{
"_id" : {
"year" : NumberInt(2019),
"month" : NumberInt(2),
"productid" : "11"
},
"totalQty" : 6.0
}
// ----------------------------------------------
{
"_id" : {
"year" : NumberInt(2019),
"month" : NumberInt(2),
"productid" : "14"
},
"totalQty" : 7.0
}
// ----------------------------------------------
{
"_id" : {
"year" : NumberInt(2019),
"month" : NumberInt(1),
"productid" : "13"
},
"totalQty" : 3.0
}
// ----------------------------------------------
{
"_id" : {
"year" : NumberInt(2019),
"month" : NumberInt(2),
"productid" : "10"
},
"totalQty" : 6.0
}
// ----------------------------------------------
{
"_id" : {
"year" : NumberInt(2018),
"month" : NumberInt(2),
"productid" : "12"
},
"totalQty" : 5.0
}
// ----------------------------------------------
{
"_id" : {
"year" : NumberInt(2019),
"month" : NumberInt(2),
"productid" : "15"
},
"totalQty" : 8.0
}
// ----------------------------------------------
{
"_id" : {
"year" : NumberInt(2019),
"month" : NumberInt(1),
"productid" : "11"
},
"totalQty" : 2.0
}
// ----------------------------------------------
What I want in output is something like :
status: 200,
msg: "SUCCESS"
data: [{
1:[
{
"productid": 11,
"totalQty": 3
},
{
"productid": 12,
"totalQty": 27
}
],
2:[
{
"productid": 11,
"totalQty": 64
},
{
"productid": 12,
"totalQty": 10
}
]
}]
For reference attaching the image of the collection
Is there any way to achieve it using aggregation or anything else or I will have to do it manually by code ?
You can append below aggreagation stages to your current pipeline:
db.product_sold.aggregate([
// your current $group stage
{
$group: {
_id: "$_id.month",
docs: { $push: { productid: "$_id.productid", totalQty: "$totalQty" } }
}
},
{
$project: {
_id: 0,
k: { $toString: "$_id" },
v: "$docs"
}
},
{
$group: {
_id: null,
data: { $push: "$$ROOT" }
}
},
{
$project: {
_id: 0,
data: { $arrayToObject: "$data" }
}
}
])
The idea here is that you can use $group with _id set to null to get all the data into single document and then use $arrayToObject to get month number as key and all the aggregates for that month as value.
I have a MongoDB query I am trying to figure out. I'd like to group my data by date and one other field (portfolio) and get the counts for each buildResult in that grouping.
Sample data looks like this:
{
"_id" : ObjectId("52dea764e4b0a491abb54102"),
"type" : "build",
"time" : ISODate("2014-01-21T16:59:16.642Z"),
"data" : {
"buildNumber" : 35,
"buildDuration" : 1034300,
"portfolio" : "Shared",
"buildResult" : "FAILURE"
}
}
{
"_id" : ObjectId("52dea7b7e4b0a491abb54103"),
"type" : "build",
"time" : ISODate("2014-01-21T17:00:39.617Z"),
"data" : {
"buildNumber" : 13,
"buildDuration" : 1186028,
"portfolio" : "Sample",
"buildResult" : "SUCCESS"
}
}
The output I am shooting for would be something like this:
{
"result" : [
{
"_id" : {
"month" : 2,
"day" : 28,
"year" : 2014,
"portfolio" : "Shared"
},
"aborted" : 3,
"failure" : 0,
"unstable" : 0,
"success" : 34
},
{
"_id" : {
"month" : 2,
"day" : 28,
"year" : 2014,
"portfolio" : "Sample"
},
"aborted" : 3,
"failure" : 2,
"unstable" : 0,
"success" : 37
}
],
"ok" : 1
}
My current query is:
db.builds.aggregate([
{ $match: { "data.buildResult" : { $ne : null} }},
{ $group: {
_id: {
month: { $month: "$time" },
day: { $dayOfMonth: "$time" },
year: { $year: "$time" },
portfolio: "$data.portfolio",
},
aborted: { $sum: { "$data.buildResult": "ABORTED" } },
failure: { $sum: { "$data.buildResult": "FAILURE" } },
unstable: { $sum: { "$data.buildResult": "UNSTABLE" } },
success: { $sum: { "$data.buildResult": "SUCCESS" } }
} },
{ $sort: { "_id.day": 1, "_id.month": 1, "_id.year": 1 } }
])
I have tried many variations with the following lines including $match, $in and other operators. Any help would be very appreciated.
aborted: { $sum: { "$data.buildResult": "ABORTED" } },
failure: { $sum: { "$data.buildResult": "FAILURE" } },
unstable: { $sum: { "$data.buildResult": "UNSTABLE" } },
success: { $sum: { "$data.buildResult": "SUCCESS" } }
To achieve that you can use the $cond and $eq operators like this:
aborted: {$sum: {$cond : [{$eq : ["$data.buildResult", "ABORTED"]}, 1, 0]}}
Edit:
As noted by Neil Lunn in the comments, the $cond here is irrelevant because the $eq operator already returns 0 or 1.
aborted: {$sum: {$eq : ["$data.buildResult", "ABORTED"]}}