I have a mongodb database with a collection user_arrival. The documents look like:
{
"_id" : ObjectId("5e0431821d0f986bdb338e82"),
"location" : {
"uuid" : "b8e671f3-742e-4a24-ae3e-6cd543e7c5bb",
"name" : "London"
},
"time" : ISODate("2019-12-16T16:37:45.000Z"),
"actor" : {
"user" : {
"uuid" : "c42f7cbf-84a6-4912-b414-6afc873e229d",
"name" : "User 1"
}
}
},
{
"_id" : ObjectId("5e0431d31d0f986bdb338e83"),
"location" : {
"uuid" : "b8e671f3-742e-4a24-ae3e-6cd543e7c5bb",
"name" : "London"
},
"time" : ISODate("2019-12-16T17:00:00.000Z"),
"actor" : {
"user" : {
"uuid" : "c42f7cbf-84a6-4912-b414-6afc873e229d",
"name" : "User 1"
}
}
},
{
"_id" : ObjectId("5e0431e41d0f986bdb338e84"),
"location" : {
"uuid" : "b8e671f3-742e-4a24-ae3e-6cd543e7c5bb",
"name" : "London"
},
"time" : ISODate("2019-12-16T17:05:00.000Z"),
"actor" : {
"user" : {
"uuid" : "c42f7cbf-84a6-4912-b414-6afc873e229d",
"name" : "User 1"
}
}
},
{
"_id" : ObjectId("5e04320f1d0f986bdb338e85"),
"location" : {
"uuid" : "b8e671f3-742e-4a24-ae3e-6cd543e7c5bb",
"name" : "London"
},
"time" : ISODate("2019-12-16T17:06:00.000Z"),
"actor" : {
"user" : {
"uuid" : "d42f7cbf-84a6-4912-b414-6afc873e229d",
"name" : "User 2"
}
}
},
{
"_id" : ObjectId("5e0432191d0f986bdb338e86"),
"location" : {
"uuid" : "b8e671f3-742e-4a24-ae3e-6cd543e7c5bb",
"name" : "London"
},
"time" : ISODate("2019-12-16T17:15:00.000Z"),
"actor" : {
"user" : {
"uuid" : "d42f7cbf-84a6-4912-b414-6afc873e229d",
"name" : "User 2"
}
}
}
According to above documents
User 1 - First-time card scan at 16:37:45
User 1 - Second-time card scan at 17:00:00
User 1 - Third-time card scan at 17:05:00
User 2 - First-time card scan at 17:06:00
User 2 - Second-time card scan at 17:15:00
Mongodb Group Query
db.getCollection('test').aggregate([
{
$match: {
time: { $gt: new Date('2019-12-16T00:00:00.000Z'), $lte: new Date('2019-12-17T00:00:00.000Z') },
'location.uuid': 'b8e671f3-742e-4a24-ae3e-6cd543e7c5bb',
},
},
{
$group: {
_id: {
hour: { $hour: { date: '$time' } },
minute: {
$subtract: [
{ $minute: { date: '$time'} },
{ $mod: [{ $minute: '$time' }, 60] },
],
},
},
count: { $addToSet: '$actor.user.uuid' },
},
},
{
$project: {
start_time: { $add: ['$_id.hour', 0] },
end_time: { $add: ['$_id.hour', 1] },
total_user_arrivals: { $size: '$count' },
_id: 0,
},
},
])
When I execute the above MongoDB query below results get.
[{
"start_time" : 16.0,
"end_time" : 17.0,
"total_user_arrivals" : 1
},
{
"start_time" : 17.0,
"end_time" : 18.0,
"total_user_arrivals" : 2
}]
Expexted Result
User 1 first-time card scan between 16:00:00 to 17:00:00
User 2 first-time card scan between 17:00:00 to 18:00:00
[{
"start_time" : 16.0,
"end_time" : 17.0,
"total_user_arrivals" : 1
},
{
"start_time" : 17.0,
"end_time" : 18.0,
"total_user_arrivals" : 1
}]
Before grouping, you need to get the first transaction of the users.
db.getCollection('test').aggregate([
{
$match: {
time: { $gt: new Date('2019-12-16T00:00:00.000Z'), $lte: new Date('2019-12-17T00:00:00.000Z') },
'location.uuid': 'b8e671f3-742e-4a24-ae3e-6cd543e7c5bb',
},
},
{ $sort: { time: 1 } },
{
$group: {
_id: {
uuid: '$actor.user.uuid',
},
tran: { $first: '$$ROOT' },
},
},
{
$group: {
_id: {
hour: { $hour: { date: '$tran.time' } },
minute: {
$subtract: [
{ $minute: { date: '$tran.time'} },
{ $mod: [{ $minute: '$tran.time' }, 60] },
],
},
},
count: { $addToSet: '$tran.actor.user.uuid' },
},
},
{
$project: {
start_time: { $add: ['$_id.hour', 0] },
end_time: { $add: ['$_id.hour', 1] },
total_user_arrivals: { $size: '$count' },
_id: 0,
},
},
])
Related
I have a schema named orders which looks like this :
{
"_id" : ObjectId("5cd42f7b16c2654ea9138ece"),
"customerId" : ObjectId("5c8222109146d119ccc5243f"),
"orderAmount" : NumberInt(10000),
"paidAmount" : NumberInt(4000),
"installments" : [
{
"dueDate" : ISODate("2020-01-01"),
"amount" : NumberInt(2000)
},
{
"dueDate" : ISODate("2020-01-07"),
"amount" : NumberInt(6000)
},
{
"dueDate" : ISODate("2020-01-04"),
"amount" : NumberInt(2000)
}
]
}
I want to write an aggregation function that sorts the installments according to dueDate and mark them paid according to paidAmount. For example for this case the function should return
{
"_id" : ObjectId("5cd42f7b16c2654ea9138ece"),
"customerId" : ObjectId("5c8222109146d119ccc5243f"),
"orderAmount" : NumberInt(10000),
"paidAmount" : NumberInt(4000),
"installments" : [
{
"dueDate" : ISODate("2020-01-01"),
"amount" : NumberInt(2000),
"paid" : true
},
{
"dueDate" : ISODate("2020-01-04"),
"amount" : NumberInt(2000),
"paid" : true
},
{
"dueDate" : ISODate("2020-01-07"),
"amount" : NumberInt(6000),
"paid" : false
}
]
}
Now I can sort the array using the $unwind and $sort functions like this:
db.orders.aggregate([
{$unwind : "$installments"},
{$sort : {"dueDate" : 1}}
]);
What I am stuck on is how to group the array back so that it gives me the desired result. I can only use aggregation here.
You need to $group installments. But, if you need to put paid field with some logic, it's necessary to add extra pipeline stages.
ASSUMPTION
paidAmount value calculated by ordered installments.[].paid
paidAmount installments.[].paid
4000 <= 2000(t) 2000(t) 6000(f)
4000 <≠ 2000(t) 6000(f) 2000(f)
4000 <≠ 6000(f) 2000(f) 2000(f)
6000 <= 6000(t) 2000(f) 2000(f)
6000 <≠ 1000(t) 6000(f) 1000(f)
6000 <= 1000(t) 4000(t) 1000(t)
EXPLANATION paid:true|false LOGIC
We order installments and create extra tmp field with installments value (for paid field).
For each installments item i, we mark paid:true if paidAmount - sum(amount0 - i) >= 0.
db.orders.aggregate([
{
$unwind: "$installments"
},
{
$sort: {
"installments.dueDate": 1
}
},
{
$group: {
_id: "$_id",
orders: {
$first: "$$ROOT"
},
installments: {
$push: "$installments"
},
tmp: {
$push: "$installments"
}
}
},
{
$unwind: "$installments"
},
{
$addFields: {
"installments.paid": {
$cond: [
{
$gte: [
{
$reduce: {
input: {
$slice: [
"$tmp",
{
$sum: [
{
$indexOfArray: [
"$tmp",
"$installments"
]
},
1
]
}
]
},
initialValue: "$orders.paidAmount",
in: {
$sum: [
{
$multiply: [
"$$this.amount",
-1
]
},
"$$value"
]
}
}
},
0
]
},
true,
false
]
}
}
},
{
$group: {
_id: "$_id",
customerId: {
$first: "$orders.customerId"
},
orderAmount: {
$first: "$orders.orderAmount"
},
paidAmount: {
$first: "$orders.paidAmount"
},
installments: {
$push: "$installments"
}
}
}
])
MongoPlayground
Try this :
db.yourCollectionName.aggregate([{ $unwind: '$installments' },{ $sort: { 'installments.dueDate': 1 } },
{ $addFields: { 'installments.paid': { $cond: [{ $lte: ["$installments.amount", '$paidAmount'] }, true, false] } } },
{ $group: { _id: '$id', data: { $first: '$$ROOT' }, installments: { $push: '$installments' } } },
{ $addFields: { 'data.installments': '$installments' } },
{ $replaceRoot: { newRoot: "$data" } }])
Collection Data :
/* 1 */
{
"_id" : ObjectId("5cd42f7b16c2654ea9138ece"),
"customerId" : ObjectId("5c8222109146d119ccc5243f"),
"orderAmount" : 10000,
"paidAmount" : 4000,
"installments" : [
{
"dueDate" : ISODate("2020-01-01T21:21:20.202Z"),
"amount" : 2000
},
{
"dueDate" : ISODate("2020-01-07T21:27:20.202Z"),
"amount" : 6000
},
{
"dueDate" : ISODate("2020-01-04T21:24:20.202Z"),
"amount" : 2000
}
]
}
Result :
/* 1 */
{
"_id" : ObjectId("5cd42f7b16c2654ea9138ece"),
"customerId" : ObjectId("5c8222109146d119ccc5243f"),
"orderAmount" : 10000,
"paidAmount" : 4000,
"installments" : [
{
"dueDate" : ISODate("2020-01-01T21:21:20.202Z"),
"amount" : 2000,
"paid" : true
},
{
"dueDate" : ISODate("2020-01-04T21:24:20.202Z"),
"amount" : 2000,
"paid" : true
},
{
"dueDate" : ISODate("2020-01-07T21:27:20.202Z"),
"amount" : 6000,
"paid" : false
}
]
}
Ref : aggregation-pipeline
I almost got this one working, but I simply cannot figure out why the $project part does not work for normal fields....
This is "invoice" table:
{
"_id" : "AS6D0",
"invoiceNumber" : 23,
"bookingId" : "AS6D0",
"createDate" : 1490369414,
"dueDate" : 1490369414,
"invoiceLines" : [
{
"lineText" : "Rent Price",
"amountPcs" : "8 x 7500",
"amountTotal" : 60000
},
{
"lineText" : "Discount(TIKO10)",
"amountPcs" : "10%",
"amountTotal" : -10000
},
{
"lineText" : "Final cleaning",
"amountPcs" : "1 x 5000",
"amountTotal" : 5000
},
{
"lineText" : "Reservation fee paid already",
"amountPcs" : "1 x -20000",
"amountTotal" : -20000
}
],
"managerId" : "4M4KE"
}
And this is my query
db.getCollection('invoice').aggregate([
{
$match: {
bookingId: "AS6D0"
}
},
{
$unwind: "$invoiceLines"
},
{
$group: {
_id: "$_id",
sum: {$sum: "$invoiceLines.amountTotal"}
}
},
{
$project:{
"_id" : 0,
"invoiceNumber" : 1,
"dueDate" : 1,
"sum" : 1
}
}
])
I get the _id and the sum, but it wont show invoiceNumber and dueDate
You could use a trick like this :
db.getCollection('invoice').aggregate([
{ $match: { } },
{ $unwind: "$invoiceLines" },
{ $group: { _id: "$_id",
sum: {$sum: "$invoiceLines.amountTotal"},
invoiceNumber: { $addToSet: "$invoiceNumber" },
dueDate: { $addToSet: "$dueDate" } } }
]);
Thanks to Mateo, this is what I ended up with:
(I do the unwind on the fields to avoid single value arrays)
Update : You don't have to $addToSet to reduce the fields into single value arrays and $unwind. Use $first instead.
db.getCollection('invoice').aggregate([
{
$match: {
bookingId: "AS6D0"
}
},
{
$unwind: "$invoiceLines"
},
{
$group: {
_id: "$_id",
sum: {$sum: "$invoiceLines.amountTotal"},
invoiceNumber: { $first: "$invoiceNumber" },
dueDate: { $first: "$dueDate" }
}
},
{
$project:{
"_id" : 0,
"invoiceNumber" : 1,
"dueDate" : 1,
"sum" : 1
}
}
])
I have the following collection:
{
"_id" : ObjectId("58503934034b512b419a6eab"),
"website" : "https://www.google.com",
"name" : "Google",
"keywords" : [
"Search",
"Websites",
],
"tracking" : [
{
"_id" : ObjectId("5874aa1df63258286528598d"),
"position" : 0,
"created_at" : ISODate("2017-01-1T09:32:13.831Z"),
"real_url" : "https://www.google.com",
"keyword" : "Search"
},
{
"_id" : ObjectId("5874aa1ff63258286528598e"),
"keyword" : "Page",
"real_url" : "https://www.google.com",
"created_at" : ISODate("2017-01-1T09:32:15.832Z"),
"found_url" : "https://google.com/",
"position" : 3
},
{
"_id" : ObjectId("5874aa21f63258286528598f"),
"keyword" : "Search",
"real_url" : "https://www.foamymedia.com",
"created_at" : ISODate("2017-01-2T09:32:17.017Z"),
"found_url" : "https://google.com/",
"position" : 2
},
{
"_id" : ObjectId("5874aa21f63258286528532f"),
"keyword" : "Search",
"real_url" : "https://www.google.com",
"created_at" : ISODate("2017-01-2T09:32:17.017Z"),
"found_url" : "https://google.com/",
"position" : 1
},
]
}
What I want to do is group all of the keywords together and calculate the average for that particular day, over a certain period.
So let's say for example:
Between: 2017-01-01 to 2017-01-31 the following keywords was tracked:
2017-01-01:
'Seach' => 1,
'Page' => 3,
Average = 2
2017-01-02:
'Search' => 4,
'Page' => 6,
Average = 5
....
So in the end result, I would be finished with (in this case):
{
"_id" : ObjectId("5874dccb9cd90425e41b7c54"),
"website" : "www.google.com",
"averages" : [
"2",
"5"
]
}
You can try something like this.
$unwind the tracking array followed by $sort on tracking.keyword and tracking.created_at.$group by day to get average for day across all categories. Final $group to push all the day's average values into array for a website.
db.website.aggregate([{
$match: {
"_id": ObjectId("58503934034b512b419a6eab")
}
}, {
$lookup: {
from: "seo_tracking",
localField: "website",
foreignField: "real_url",
as: "tracking"
}
}, {
$unwind: "$tracking"
}, {
$sort: {
"tracking.keyword": 1,
"tracking.created_at": -1
}
}, {
$group: {
_id: {
$dayOfMonth: "$tracking.created_at"
},
"website": {
$first: "$website"
},
"website_id": {
$first: "$_id"
},
"averageByDay": {
$avg: "$tracking.position"
}
}
}, {
$group: {
"_id": "$website_id",
"website": {
$first: "$website"
},
"average": {
$push: "$averageByDay"
}
}
}]);
I have a problem of MongoDB's aggregate of timezone is UTC. I have looked for solutions from many other existing issues, but it is still not working. My code as follows:
MongoDB version : 2.2
Data
{ "_id" : ObjectId("52a3c9df46c6a9627eeb0337"), "Counting" : { "id" : "b1a93dfda46c47848f9862031300d24c", "group" : "Salary", "user_id" : "4d4ad2d37a464ad09d9aca2fee4c760c", "subGroup" : "e–ae3?", "bank_id" : "97e0fecc322b49b48c4eb3c8425fea77", "fee" : 646, "isIncome" : "true", "payment" : "", "consumeDate" : ISODate("2013-08-15T16:00:00Z"), "createDate" : ISODate("2013-12-08T01:22:39.008Z"), "bank_name" : "9edb6897-cdb8-4ce4-8f08-f5792cfa83d9" } }
{ "_id" : ObjectId("52a3c9df46c6a9627eeb0338"), "Counting" : { "id" : "33b341fc71314daebe851397c5cbaa40", "group" : "Salary", "user_id" : "cb9e06649cf943e5b368f6b05fc126c6", "subGroup" : "e–ae3?", "bank_id" : "e8da8cdae3ae495ca76f873fb3460b6d", "fee" : 647, "isIncome" : "true", "payment" : "", "consumeDate" : ISODate("2013-02-28T16:00:00Z"), "createDate" : ISODate("2013-12-08T01:22:39.016Z"), "bank_name" : "6913b48a-1a95-48c5-81f5-6920031358d7"} }
{ "_id" : ObjectId("52a3c9df46c6a9627eeb033a"), "Counting" : { "id" : "f0d41ed9f29f47e7b68a05c378cf939d", "group" : "Salary", "user_id" : "847cadbf55f84615af3ee63922446b54", "subGroup" : "e–ae3?", "bank_id" : "f45d62b5e62f4b7fa8172870cd992f19", "fee" : 623, "isIncome" : "true", "payment" : "", "consumeDate" : ISODate("2013-04-18T16:00:00Z"), "createDate" : ISODate("2013-12-08T01:22:39.152Z"), "bank_name" : "30dd169e-723e-4748-93cd-2d7a45b4a3b7"} }
db.Product.aggregate([{
"$group": {
"_id": {
"tyear": {
"$year": [{
"$add": ["$Counting.consumeDate", 28800000]
}]
},
"tMonth": {
"$month": [{
"$add": ["$Counting.consumeDate", 28800000]
}]
},
"tDate": {
"$dayOfMonth": [{
"$add": ["$Counting.consumeDate", 28800000]
}]
},
},
"count": {
"$sum": "$Counting.fee"
}
} }])
Error Message :
"errmsg" : "exception: $add does not support dates"
Reference
How to agregate by year-month-day on a different timezone
I'd recommend doing this in two-steps as a project then a group.
var millisecondsFromUTC = 8 * 60 * 60 * 1000; //PST is -8 hours from UTC
db.Product.aggregate([
{ $project : {
consumeDateLocal: {
$subtract : [ "$Counting.consumeDate", millisecondsFromUTC ]
},
fee: '$Counting.fee" } },
{ $group: {
_id: {
"tyear": { $year: "$consumeDateLocal" },
"tMonth": { "$month": "$consumeDateLocal" },
"tDate": { "$dayOfMonth": "consumeDateLocal" }
},
count: {
$sum: "$fee"
}
} } ], ...);
I do it like this.
millisecondsFromUTC = 8 * 3600 * 1000
Db.collection.aggreagte([
{$match: query},
{
$group: {
_id: {
$dateToString: {
format: "%Y-%m-%d",
date: {$add: ["$date", millisecondsFromUTC]}
},
click: {$sum: '$click'},
money: {$sum: {$divide: ['$money', 10000]}},
pv: {$sum: '$pv'},
req: {$sum: '$req'},
date: {$last: '$date'}
}
}]
I've got the following query that works fine. However adding a sort to the aggregation pipeline does not actually sort it. It does change the order of the results but does not properly sort.
Any idea why it refuses to sort the results by date?
db.calls.aggregate({
$match: {
'_id.date': {
$gte: ISODate('2014-04-13T00:00:00.000Z'),
$lte: ISODate('2014-04-20T00:00:00.000Z')
}
}
}, {
$project: {
calls: 1,
year: {
$year: '$_id.date'
},
month: {
$month: '$_id.date'
},
day: {
$dayOfMonth: '$_id.date'
}
}
}, {
$group: {
_id: {
name: "$_id.name",
day: '$day',
year: '$year',
month: '$month'
},
date: {
'$first': '$_id.date'
},
calls: {
'$sum': '$calls'
}
}
}, {
$group: {
_id: '$_id.name',
records: {
$addToSet: {
date: '$date',
calls: '$calls'
}
}
}
}, {
$sort: {
'records.date': 1
}
})
Outputs:
/* 0 */
{
"result" : [
{
"_id" : "a",
"records" : [
{
"date" : ISODate("2014-04-13T00:00:00.000Z"),
"calls" : 522
},
{
"date" : ISODate("2014-04-16T00:00:00.000Z"),
"calls" : 1523
},
{
"date" : ISODate("2014-04-20T00:00:00.000Z"),
"calls" : 57
},
{
"date" : ISODate("2014-04-14T00:00:00.000Z"),
"calls" : 1540
},
{
"date" : ISODate("2014-04-15T00:00:00.000Z"),
"calls" : 1592
},
{
"date" : ISODate("2014-04-17T00:00:00.000Z"),
"calls" : 1466
},
{
"date" : ISODate("2014-04-18T00:00:00.000Z"),
"calls" : 1003
},
{
"date" : ISODate("2014-04-19T00:00:00.000Z"),
"calls" : 623
}
]
},
{
"_id" : "b",
"records" : [
{
"date" : ISODate("2014-04-15T00:00:00.000Z"),
"calls" : 102
},
{
"date" : ISODate("2014-04-17T00:00:00.000Z"),
"calls" : 97
},
{
"date" : ISODate("2014-04-16T00:00:00.000Z"),
"calls" : 116
},
{
"date" : ISODate("2014-04-13T00:00:00.000Z"),
"calls" : 118
},
{
"date" : ISODate("2014-04-14T00:00:00.000Z"),
"calls" : 142
},
{
"date" : ISODate("2014-04-19T00:00:00.000Z"),
"calls" : 68
},
{
"date" : ISODate("2014-04-18T00:00:00.000Z"),
"calls" : 100
},
{
"date" : ISODate("2014-04-20T00:00:00.000Z"),
"calls" : 6
}
]
},
{
"_id" : "c",
"records" : [
{
"date" : ISODate("2014-04-17T00:00:00.000Z"),
"calls" : 137130
},
{
"date" : ISODate("2014-04-14T00:00:00.000Z"),
"calls" : 139497
},
{
"date" : ISODate("2014-04-13T00:00:00.000Z"),
"calls" : 92166
},
{
"date" : ISODate("2014-04-18T00:00:00.000Z"),
"calls" : 123129
},
{
"date" : ISODate("2014-04-15T00:00:00.000Z"),
"calls" : 146390
},
{
"date" : ISODate("2014-04-20T00:00:00.000Z"),
"calls" : 4515
},
{
"date" : ISODate("2014-04-16T00:00:00.000Z"),
"calls" : 141792
},
{
"date" : ISODate("2014-04-19T00:00:00.000Z"),
"calls" : 104847
}
]
}
],
"ok" : 1
}
Document structure:
{
"_id" : {
"date" : ISODate("2014-04-08T12:00:00.000Z"),
"name" : "a"
},
"calls" : 515
}
Fixed by doing a $sort before the last group and switching out $addToSet with $push so the order is preserved.
db.calls.aggregate({
$match: {
'_id.date': {
$gte: ISODate('2014-04-13T00:00:00.000Z'),
$lte: ISODate('2014-04-20T00:00:00.000Z')
}
}
}, {
$project: {
calls: 1,
year: {
$year: '$_id.date'
},
month: {
$month: '$_id.date'
},
day: {
$dayOfMonth: '$_id.date'
}
}
}, {
$group: {
_id: {
name: "$_id.name",
day: '$day',
year: '$year',
month: '$month'
},
date: {
'$first': '$_id.date'
},
calls: {
'$sum': '$calls'
}
}
}, {
$sort: {
date: 1
}
}, {
$group: {
_id: '$_id.name',
records: {
$push: {
date: '$date',
calls: '$calls'
}
}
}
})