Mongodb - Get sales per hours - mongodb

Good people! I am in need of your help.
I am trying to create a line graph using apexcharts with data imported from Mongodb.
I am trying to graph hourly sales, so I need the number of sales for each hour of the day.
Example Mongodb document.
{
"_id" : ObjectId("5dbee4eed6f04aaf191abc59"),
"seller_id" : "5aa1c2c35ef7a4e97b5e995a",
"temp" : "4.3",
"sale_type" : "coins",
"createdAt" : ISODate("2020-05-10T00:10:00.000Z"),
"updatedAt" : ISODate("2019-11-10T14:32:14.650Z")
}
Up to now I have a query like this:
db.getCollection('sales').aggregate([
{ "$facet": {
"00:00": [
{ "$match" : {createdAt: {$gte: ISODate("2020-05-10T00:00:00.000Z"),$lt: ISODate("2020-05-10T00:59:00.001Z")},seller_id: "5aa1c2c35ef7a4e97b5e995a",
}},
{ "$count": "sales" },
],
"01:00": [
{ "$match" : {createdAt: {$gte: ISODate("2020-05-10T01:00:00.000Z"),$lt: ISODate("2020-05-10T01:59:00.001Z")},seller_id: "5aa1c2c35ef7a4e97b5e995a",
}},
{ "$count": "sales" },
],
"02:00": [
{ "$match" : {createdAt: {$gte: ISODate("2020-05-10T02:00:00.000Z"),$lt: ISODate("2020-05-10T02:59:00.001Z")},seller_id: "5aa1c2c35ef7a4e97b5e995a",
}},
{ "$count": "sales" },
],
"03:00": [
{ "$match" : {createdAt: {$gte: ISODate("2020-05-10T03:00:00.000Z"),$lt: ISODate("2020-05-10T03:59:00.001Z")},seller_id: "5aa1c2c35ef7a4e97b5e995a",
}},
{ "$count": "sales" },
],
}},
{ "$project": {
"ventas0": { "$arrayElemAt": ["$01:00.sales", 0] },
"ventas1": { "$arrayElemAt": ["$02:00.sales", 0] },
"ventas3": { "$arrayElemAt": ["$03:00.sales", 0] },
}}
])
But I am sure there is a more efficient way to do this.
My expected output looks like this:
[countsale(00:00),countsale(01:00),countsale(02:00),countsale(03:00), etc to 24 hs]

You are correct, there is a more efficient way to do this. We can use Date expression operators and specifically by grouping with $hour.
db.getCollection('sales').aggregate([
{
$match: {
createdAt: {$gte: ISODate("2020-05-10T00:00:00.000Z"), $lt: ISODate("2020-05-11T00:00:00.001Z")}
}
},
{
$group: {
_id: {$hour: "$createdAt"},
count: {$sum: 1}
}
},
{
$sort: {
_id: 1
}
}
]);
This will give you this result:
[
{
_id: 0,
count: x
},
{
_id: 1,
count: y
},
...
{
_id: 23,
count: z
}
]
From here you can restructure the data easily as you wish.
A problem I forsee happening are hours without any matches (i.e count=0) will not exists in the result set. you'll have to fill in those gaps manually.

Related

Group different field by quarter

I've got a aggregation :
{
$group: {
_id: "$_id",
cuid: {$first: "$cuid"},
uniqueConnexion: {
$addToSet: "$uniqueConnexion"
},
uniqueFundraisings: {
$addToSet: "$uniqueFundraisings"
}
}
},
that result with :
{
"cuid" : "cjcqe7qdo00nl0ltitkxdw8r6",
"uniqueConnexion" : [
"09.2019",
"06.2019",
"07.2019",
"08.2019",
"05.2019"
],
"uniqueFundraisings" : [
"06.2019",
"02.2019",
"01.2019",
"03.2019",
"09.2018",
"10.2018"
],
}
And now I'm want to group the uniquerConnexion and uniqueFundraisings fields to a new field (name uniqueAction) and convert them to a quarter format.
So an output like this :
{
"cuid" : "cjcqe7qdo00nl0ltitkxdw8r6",
"uniqueAction" : [
"Q4-2018",
"Q1-2019",
"Q2-2019",
"Q3-2014",
],
}
The previous answer shows the power of $setUnion operating on two lists. I have taken that and expanded a little more to get the OP target state. Given an input that more clearly shows some quarterly grouping (hint!):
var r =
{
"cuid" : "cjcqe7qdo00nl0ltitkxdw8r6",
"uniqueConnexion" : [
"01.2018",
"02.2018",
"08.2018",
"09.2018",
"10.2018",
"11.2018"
],
"uniqueFundraisings" : [
"01.2018",
"02.2018",
"05.2018",
"06.2018",
"12.2018"
],
};
this agg:
db.foo.aggregate([
// Unique-ify the two lists:
{ $project: {
cuid:1,
X: { $setUnion: [ "$uniqueConnexion", "$uniqueFundraisings" ] }
}}
// Now need to get to quarters....
// The input date is "MM.YYYY". Need to turn it into "Qn-YYYY":
,{ $project: {
X: {$map: {
input: "$X",
as: "z",
in: {$let: {
vars: { q: {$toInt: {$substr: ["$$z",0,2] }}},
in: {$concat: [{$cond: [
{$lte: ["$$q", 3]}, "Q1", {$cond: [
{$lte: ["$$q", 6]}, "Q2", {$cond: [
{$lte: ["$$q", 9]}, "Q3", "Q4"] }
]}
]} ,
"-", {$substr:["$$z",3,4]},
]}
}}}}}}
,{ $unwind: "$X"}
,{ $group: {_id: "$X", n: {$sum:1} }}
]);
produces this output. Yes, the OP was not looking for the count of things appearing in each quarter but very often that quickly follows on the heels of the original ask.
{ "_id" : "Q4-2018", "n" : 3 }
{ "_id" : "Q3-2018", "n" : 2 }
{ "_id" : "Q2-2018", "n" : 2 }
{ "_id" : "Q1-2018", "n" : 2 }
i think this will help you
{ $project: {
cuid:1,
uniqueAction: { $setUnion: [ "$uniqueConnexio", "$uniqueAction" ] }, _id: 0
}
}

Count documents by the average of its fields in Mongodb

Our students' scores in 4 fields are modeled like this:
{
"_id" : xxx,
"student" : "Private Ryan",
"math" : 9,
"history" : 8,
"literature" : 6,
"science" : 10
}
The task is to do count how many good/average/bad performed students there are. Given:
Good : having average >= 8 point
Bad : having average score < 5.
If possible, bucket them would be nice too.
You can use $addFields and $let to define "label" for every student. To apply conditional logic you can take advantage of $switch or double $cond. Then you need to run $group to count them and also you can use $push to get entire documents in final result:
db.collection.aggregate([
{
$addFields: {
label: {
$let: {
vars: {
avg: {
$divide: [ { $sum: [ "$math", "$history", "$literature", "$science" ] }, 4 ]
}
},
in: {
$cond: [
{ $gte: [ "$$avg", 8 ] },
"good",
{ $cond: [ { $lt: [ "$$avg", 5 ] }, "bad", "average" ] }
]
}
}
}
}
},
{
$group: {
_id: "$label",
count: { $sum: 1 },
students: { $push: "$$ROOT" }
}
}
])
Mongo Playground

MongoDB aggregation: average sales per hour

I have a collection with sales. Now I need to get the average number of sales per hour within a date range.
Up to now I have a query like this:
db.getCollection('sales').aggregate({
"$match": {
$and: [
{ "createdAt": { $gte: ISODate("2018-05-01T00:00:00.000Z") } },
{ "createdAt": { $lt: ISODate("2018-10-30T23:59:00.000Z") } },
]
}
},{
"$project": {
"h":{"$hour":"$createdAt"},
}
},{
"$group":{
"_id": "$h",
"salesPerHour": { $sum: 1 },
},
},{
"$sort": { "salesPerHour": -1 }
});
The result looks like this: {"_id" : 15, "salesPerHour" : 681.0}
How can I get the average value of salesPerHour instead the sum?
Update 1 => Example document.
{
"_id" : "pX6jj7j4274J9xpSA",
"idFiscalSale" : "48",
"documentYear" : "2018",
"paymentType" : "cash",
"cashReceived" : 54,
"items" : [...],
"customer" : null,
"subTotal" : 23.89,
"taxTotal" : 3.7139,
"total" : 23.89,
"rewardPointsValue" : 0,
"rewardPointsEarned" : 24,
"discountValue" : 0,
"createdAt" : ISODate("2018-04-24T00:00:00.201Z")
}
You can use below aggregation query.
db.sales.aggregate([
{"$match":{
"createdAt":{
"$gte":ISODate("2018-05-01T00:00:00.000Z"),
"$lt":ISODate("2018-10-30T23:59:00.000Z")
}
}},
{"$group":{
"_id":{"$hour":"$createdAt"},
"salesPerHour":{"$sum":1}
}},
{"$group":{
"_id":null,
"salesPerHour":{"$avg":"$salesPerHour"}
}}
])
You can try below aggregation
You have to use $avg aggregation operator with the salesPerHour field
db.collection.aggregate([
{ "$match": {
"$and": [
{ "createdAt": { "$gte": ISODate("2018-05-01T00:00:00.000Z") }},
{ "createdAt": { "$lt": ISODate("2018-10-30T23:59:00.000Z") }}
]
}},
{ "$group": {
"_id": { "$hour": "$createdAt" },
"salesPerHour": {
"$avg": "$salesPerHour"
}
}}
])

How to use MongoDB to aggregate Reports for time periods within multiple days?

I have pre-aggregated reports following the mongodb docs, see:
https://docs.mongodb.org/ecosystem/use-cases/pre-aggregated-reports/#one-document-per-page-per-day
{
_id: "20101010/site-1/apache_pb.gif",
metadata: {
date: ISODate("2000-10-10T00:00:00Z"),
site: "site-1",
page: "/apache_pb.gif" },
daily: 5468426,
hourly: {
"0": 227850,
"1": 210231,
...
"23": 20457 },
minute: {
"0": 3612,
"1": 3241,
...
"1439": 2819 }
}
I aggregate the views for x days back like this:
db.collection.aggregate(
{ $match : { date: { $gte : ISODate("2016-04-07T00:00:00.000Z")}}},
{ $group: {_id: "_id", views: {$sum: "$daily"}}},
{ $sort: {views: -1}}
)
Now I want the views of the last x hours. The problem here is the date line. As long as I' m within the same day my solution works fine, but how to handle crossing the date line the best way?
Here is what I have:
db.collection.aggregate(
{ $match : { date: { $eq : ISODate("2016-04-07T00:00:00.000Z")}}},
{ $group: {_id: "$_id", views: { $sum: { $add: [ '$hourly.1', '$hourly.2', ... ] }} }},
{ $sort: {views: -1}}
After some research, here is my answer:
db.collection.aggregate([
{ $match : { date : { $gte : ISODate("2016-04-20T00:00:00.000Z")}}},
{ $project : {
_id: "$_id",
views :
{
$cond : [
{ $eq : [ "$date", ISODate("2016-04-20T00:00:00.000Z") ] },
{ $add: [ { $ifNull : ['$hourly.22' , 0 ] } , { $ifNull : ['$hourly.23' , 0 ] } ] },
{ $add: [ { $ifNull : ['$hourly.0' , 0 ] } , { $ifNull : ['$hourly.1' , 0 ] } ] },
]
}
}
},
{ $group: { "_id" : "$_id", views: { $sum: { $add: [ "$views" ] } } }},
{ $sort: { views: -1}}
])
The $cond attribute did the trick for me, so I could distinguish the different date datasets.

How to simplify this Aggregation Framework Query (with Date Formatting & Comparisons)

I already have a working query for what I need (included below), but I can't help but feel that there must be a better way to accomplish this. My requirements are fairly simple, but the resulting query itself is the definition of eye-bleed code.
Here's a sample document that we're iterating over (with irrelevant properties removed):
> db.Thing.find().limit(1).pretty()
{
"_id": ObjectId(...),
"created": ISODate(...),
"updated": ISODate(...)
}
My requirements for the query are:
Only match on Things where created > updated.
Group on the YYYY-MM value of the created field, and reduce to a count.
Output should look like the following:
{ "count": 93592, "month": "2014-06" },
{ "count": 81629, "month": "2014-07" },
{ "count": 126183, "month": "2014-08" },
...
Again, this feels like it should be really simple. Here's my correctly functioning query that currently does this:
db.Thing.aggregate([
{ $project: {
cmpDates: { $cmp: ['$created', '$updated'] },
created: '$created'
}},
{ $match: {
cmpDates: { $ne: 0 }
}},
{ $project: {
month: {
$concat: [
{ $substr: [ { $year: '$created' }, 0, 4 ] },
'-',
{ $cond: [
{ $lte: [ { $month: '$created' }, 9 ] },
{ $concat: [
'0',
{ $substr: [ { $month: '$created' }, 0, 2 ] }
]},
{ $substr: [ { $month: '$created' }, 0, 2 ] }
] }
]
},
_id: 0
}},
{ $group: {
_id: '$month',
count: { $sum: 1 }
}},
{ $project: {
month: '$_id',
count: 1,
_id: 0
}},
{ $sort: { month: 1 } }
]);
My question: Can this query be simplified, and if so, how?
Thanks!
Try this:
db.test.aggregate([
{ "$project" : {
"cmpDates" : { "$cmp" : ["$created", "$updated"] },
"createdYear" : { "$year" : "$created" },
"createdMonth" : { "$month" : "$created" }
} },
{ "$match" : { "cmpDates" { "$ne" : 0 } } },
{ "$group" : {
"_id" : { "y" : "$createdYear", "m" : "$createdMonth" },
"count" : { "$sum" : 1 }
} }
])
The big difference is that I used a compound key for the group, so I'm grouping the pair (year, month) instead of constructing a string value YYYY-MM to accomplish the same purpose.