Calculating day-of-week frequency with Mongo Aggregate - mongodb

I'm trying to calculate the frequency of site visits using a Mongo Aggregate function, ie. in a given week, how many days did a user visit the site?
{ "_id" : ObjectId("5f7720caf2b93af8d566bc7c"), "email" : "blah#gmail.com", "timestamp" : ISODate("2020-09-29T17:59:00Z") }
{ "_id" : ObjectId("5f7720dcf2b93af8d566ffb7"), "email" : "blah#gmail.com", "timestamp" : ISODate("2020-09-30T01:01:00Z") }
{ "_id" : ObjectId("5f7721bbf2b93af8d56aadc4"), "email" : "yack#gmail.com", "timestamp" : ISODate("2020-10-01T09:58:00Z") }
{ "_id" : ObjectId("5f771e9ff2b93af8d55c57a9"), "email" : "yack#gmail.com", "timestamp" : ISODate("2020-09-26T04:12:00Z") }
{ "_id" : ObjectId("5f771e9ff2b93af8d55c5f6b"), "email" : "yack#gmail.com", "timestamp" : ISODate("2020-09-26T04:22:00Z") }
{ "_id" : ObjectId("5f771eeaf2b93af8d55dc45c"), "email" : "yack#gmail.com", "timestamp" : ISODate("2020-09-27T04:11:00Z") }
Output I'd like:
[
{ "_id": "blah#gmail.com", "dow" [ 2, 3 ], "visits": 2 }, // Visited Tuesday and Wednesday
{ "_id": "yack#gmail.com", "dow" [ 0, 1, 2, 3, 4, 5 ], "visits": 6 } // Visited Sunday through to Friday
]
I can get each email/dow pair as a record, but I'm not sure where to go from here...
[
{
$group: {
_id: { email: "$email", dow: { $dayOfWeek: "$timestamp" } },
}
}
]
Outputs:
{ "_id" : { "email" : "blah#gmail.com", "dow" : 2 } }
{ "_id" : { "email" : "blah#gmail.com", "dow" : 3 } }
{ "_id" : { "email" : "yack#gmail.com", "dow" : 0 } }
{ "_id" : { "email" : "yack#gmail.com", "dow" : 1 } }
{ "_id" : { "email" : "yack#gmail.com", "dow" : 2 } }
{ "_id" : { "email" : "yack#gmail.com", "dow" : 3 } }
{ "_id" : { "email" : "yack#gmail.com", "dow" : 4 } }
{ "_id" : { "email" : "yack#gmail.com", "dow" : 5 } }
Thanks!

You can use another $group statement:
db.collection.aggregate([
{
$group: {
_id: { email: "$email", dow: { $dayOfWeek: "$timestamp" } }
}
},
{
$group: {
_id: "$_id.email",
dow: { $push: "$_id.dow" },
visits: { $sum: 1 }
}
}
])
Mongo Playground

Related

How to compare two date fields in a document, one of them being embedded in an array?

Im trying to compare two fields with specified $type = "date" however when i try to query i get 0 results.
My end goal is to output all documents with s.status =6 and s.time < r.
db.inventory.insertMany( [
{ item: "canvas","r" : {"$date" : "2019-05-23T00:00:00.000+0000"},"s" :[{ "status" : 1, "time" : { "$date" : "2019-05-23T23:03:10.000+0000" } }, { "status" : 2, "time" : { "$date" : "2019-05-23T23:05:03.000+0000" } }, { "status" : 4, "time" : { "$date" : "2019-05-23T23:05:06.000+0000" } }, { "status" : 5, "time" : { "$date" : "2019-05-23T23:05:07.000+0000" } }, { "status" : 6, "time" : { "$date" : "2019-05-23T23:05:09.000+0000" } } ]},
{ item: "paper","r" : { "$date" : "2019-05-24T06:00:00.000+0000" }, "s" : [ { "status" : 1, "time" : { "$date" : "2019-05-23T23:03:10.000+0000" } }, { "status" : 2, "time" : { "$date" : "2019-05-23T23:05:03.000+0000" } }, { "status" : 3, "time" : { "$date" : "2019-05-23T23:05:06.000+0000" } }, { "status" : 4, "time" : { "$date" : "2019-05-25T05:05:07.000+0000" } }, { "status" : 6, "time" : { "$date" : "2019-05-25T05:55:09.000+0000" } } ] },
{ item: "stone","r" : { "$date" : "2019-05-23T05:00:00.000+0000" }, "s" : [ { "status" : 1, "time" : { "$date" : "2019-05-23T23:03:10.000+0000" } }, { "status" : 2, "time" : { "$date" : "2019-05-23T23:05:03.000+0000" } } ] },
{ item: "mass","r" : {"$date" : "2019-05-24T06:00:00.000+0000"},"s" :[ { "status" : 1, "time" : { "$date" : "2019-05-23T23:03:10.000+0000" } }, { "status" : 2, "time" : { "$date" : "2019-05-23T23:05:03.000+0000" } }, { "status" : 4, "time" : { "$date" : "2019-05-23T23:05:06.000+0000" } }, { "status" : 5, "time" : { "$date" : "2019-05-23T23:05:07.000+0000" } }, { "status" : 6, "time" : { "$date" : "2019-05-24T06:06:09.000+0000" } } ] },
{item : "paper","r" : {"$date" : "2019-05-24T06:00:00.000+0000" },"s" :[ { "status" : 1, "time" : { "$date" : "2019-05-23T23:03:10.000+0000" } }, { "status" : 2, "time" : { "$date" : "2019-05-23T23:05:03.000+0000" } }, { "status" : 4, "time" : { "$date" : "2019-05-23T23:05:06.000+0000" } }, { "status" : 5, "time" : { "$date" : "2019-05-23T23:05:07.000+0000" } }, { "status" : 6, "time" : { "$date" : "2019-05-24T06:06:09.000+0000" } } ] }
]);
what I've tried-
db.inventory.find({"s": {"$elemMatch" : {"status" : 6,"time" : { "$lt" : ISODate(this.r)}}});
I'm getting no error butt also 0 results.
The $elemMatch doesn't support comparison with fields till now.
The following query can get us the expected output:
db.inventory.find({
$expr:{
$gt:[
{
$size:{
$filter:{
"input":"$s",
"as":"doc",
"cond":{
$and:[
{
$eq:["$$doc.status",6]
},
{
$lt:["$$doc.time","$r"]
}
]
}
}
}
},
0
]
}
}).pretty()

mongodb help required for one business problem

Business Problem:
We are seeing a single customerOrderNumber with all versions having “INACTIVE.” This is a problem for multiple reasons. My goal is to be able to pull a list of customerOrderNumbers with ONLY INACTIVE statuses.
Database and Query: XXX_ORDERMGMT_1
db.getCollection('customerOrder').aggregate( [ { $match: { 'orderDocument.accountInfo.ban': '123456' } }, { $group: { _id: { customerOrderNumber : '$orderReference.customerOrderNumber', status : '$orderReference.customerOrderStatus' }, count: { $sum: 1 } }, }] )
OUTPUT:
/* 1 / { "_id" : { "customerOrderNumber" : "123", "status" : "COMPLETED" }, "count" : 1.0 } / 2 */ { "_id" : { "customerOrderNumber" : "123", "status" : "INACTIVE" }, "count" : 2.0 }
DESIRED_OUTPUT:
/* 1 */ { "_id" : { "customerOrderNumber" : "123", "statusGroupings" : { "status" : "COMPLETED", "status_cnt" : 1.0 }, { "status" : "INACTIVE", "status_cnt" : 2.0 } }, "count" : 3.0 }
( My approach was to pull all customerOrders by status and count, parse it into a relational format, and filter by only customerOrderNumbers with all versions being INACTIVE. This may not be the best way and I’m open to thoughts.)
The following query can get us the expected output:
db.getCollection("customerOrder").aggregate([
{
$match:{
"orderDocument.accountInfo.ban":"123456"
}
},
{
$group:{
"_id":{
"customerOrderNumber":"$orderReference.customerOrderNumber",
"status":"$orderReference.customerOrderStatus"
},
"customerOrderNumber":{
$first:"$orderReference.customerOrderNumber"
},
"status":{
$first:"$orderReference.customerOrderStatus"
},
"count":{
$sum:1
}
}
},
{
$group:{
"_id":"$customerOrderNumber",
"customerOrderNumber":{
$first:"$customerOrderNumber"
},
"statusGroupings":{
$push:{
"status":"$status",
"status_cnt":"$count"
}
},
"count":{
$sum:"$count"
}
}
},
{
$project:{
"_id":0
}
}
]).pretty()
Data set:
{
"_id" : ObjectId("5d9bf7204ed5d873f39a773b"),
"orderDocument" : {
"accountInfo" : {
"ban" : "123456"
}
},
"orderReference" : {
"customerOrderNumber" : 1,
"customerOrderStatus" : "INACTIVE"
}
}
{
"_id" : ObjectId("5d9bf7204ed5d873f39a773c"),
"orderDocument" : {
"accountInfo" : {
"ban" : "123456"
}
},
"orderReference" : {
"customerOrderNumber" : 1,
"customerOrderStatus" : "COMPLETED"
}
}
{
"_id" : ObjectId("5d9bf7204ed5d873f39a773d"),
"orderDocument" : {
"accountInfo" : {
"ban" : "123456"
}
},
"orderReference" : {
"customerOrderNumber" : 1,
"customerOrderStatus" : "INACTIVE"
}
}
{
"_id" : ObjectId("5d9bf7204ed5d873f39a773e"),
"orderDocument" : {
"accountInfo" : {
"ban" : "123456"
}
},
"orderReference" : {
"customerOrderNumber" : 1,
"customerOrderStatus" : "INACTIVE"
}
}
Output:
{
"customerOrderNumber" : 1,
"statusGroupings" : [
{
"status" : "COMPLETED",
"status_cnt" : 1
},
{
"status" : "INACTIVE",
"status_cnt" : 3
}
],
"count" : 4
}

Multiple condition on same column mongodb

{
"_id" : ObjectId("5ccfe06e2434de5c345d058e"),
"user_id" : 1,
"event" : "eventA",
"channel_id" : "1098",
"channel_node_id" : "2177",
"channel_name" : "New work",
"created_at" : NumberLong("1557127278000"),
"updated_at" : NumberLong("1557127278000"),
"user_raw_data" : {
"Name" : "akhilesh",
"Mobile" : "1234567890",
"Email" : "akhilesh#test.com"
},
{
"_id" : ObjectId("5ccfe06e2434de5c345d058e"),
"user_id" : 1
"event" : "eventB,
"channel_id" : "1098",
"channel_node_id" : "2177",
"channel_name" : "New work",
"created_at" : NumberLong("1557127278000"),
"updated_at" : NumberLong("1557127278000"),
"user_raw_data" : {
"Name" : "akhilesh",
"Mobile" : "1234567890",
"Email" : "akhilesh#test.com"
},
{
"_id" : ObjectId("5ccfe06e2434de5c345d058e"),
"user_id" : 1
"event" : "eventC,
"channel_id" : "1098",
"channel_node_id" : "2178",
"channel_name" : "New work",
"created_at" : NumberLong("1557127278000"),
"updated_at" : NumberLong("1557127278000"),
"user_raw_data" : {
"Name" : "akhilesh",
"Mobile" : "1234567890",
"Email" : "akhilesh#test.com"
}
,
{
"_id" : ObjectId("5ccfe06e2434de5c345d058e"),
"user_id" : 2
"event" : "eventA,
"channel_id" : "1098",
"channel_node_id" : "2178",
"channel_name" : "New work",
"created_at" : NumberLong("1557127278000"),
"updated_at" : NumberLong("1557127278000"),
"user_raw_data" : {
"Name" : "akhilesh",
"Mobile" : "1234567890",
"Email" : "akhilesh#test.com"
}
Now I want to get the count of events performed by the user so far. For eventA count will be 2, for eventB 1, and for eventC is 1. But this query will have multiple conditions so my condition is getting the count only if the user has performed (eventA or eventB) and eventC. So in that case from above doc user id 2 event will not be considered because they have not performed (eventA or eventB)
Also along with event match with mutiple and or condition I also want to apply filter on user_raw_data so my query should be like this
db.web_channel_events.aggregate([
{
$match: {
"channel_id": "1098",
"channel_node_id": "2177"
}
},
{
$group: {
"_id": {
"user_id": "$user_id",
"event": "$event"
},
"count": {
$sum: 1
}
}
},
{
$group: {
"_id": "$_id.user_id",
"event_details": {
$push: {
"k": "$_id.event",
"v": "$count"
}
}
}
},
{
$match: {
$and: [
{
$or: [
{
"event_details.k": "eventA",
"event_details.v": {
"$gte": 1
}
},
{
"event_details.k": "eventB",
"event_details.v": {
"$gte": 1
}
}
]
},
{
"event_details.k": "eventC",
"event_details.v": {
"$gte": 1
}
},
{
"user_raw_data.Name": "akhilesh"
}
]
}
},
{
"$unwind": "$event_details"
},
{
$group: {
"_id": "$event_details.k",
"count": {
$sum: "$event_details.v"
}
}
}
]).pretty();
The following query can get us the expected output:
db.collection.aggregate([
{
$group:{
"_id":{
"user_id":"$user_id",
"event":"$event"
},
"user_id":{
$first:"$user_id"
},
"event":{
$first:"$event"
},
"count":{
$sum:1
}
}
},
{
$group:{
"_id":"$user_id",
"user_id":{
$first:"$user_id"
},
"event_details":{
$push:{
"k":"$event",
"v":"$count"
}
}
}
},
{
$addFields:{
"event_details":{
$arrayToObject:"$event_details"
}
}
},
{
$match:{
$and:[
{
$or:[
{
"event_details.eventA":{
$gt:0
}
},
{
"event_details.eventB":{
$gt:0
}
}
]
},
{
"event_details.eventC":{
$gt:0
}
}
]
}
},
{
$group:{
"_id":null,
"eventA":{
$sum:"$event_details.eventA"
},
"eventB":{
$sum:"$event_details.eventB"
},
"eventC":{
$sum:"$event_details.eventC"
}
}
},
{
$project:{
"_id":0,
"event_details.eventA":"$eventA",
"event_details.eventB":"$eventB",
"event_details.eventC":"$eventC"
}
}
]).pretty()
Data set:
{
"_id" : ObjectId("5ccfe06e2434de5c345d0588"),
"event" : "eventA",
"user_id" : 1,
"created_at" : NumberLong("1557127278000"),
"updated_at" : NumberLong("1557127278000")
}
{
"_id" : ObjectId("5d8b132388edcf85b0aaa609"),
"event" : "eventA",
"user_id" : 1,
"created_at" : NumberLong("1557127278000"),
"updated_at" : NumberLong("1557127278000")
}
{
"_id" : ObjectId("5d8b132388edcf85b0aaa60a"),
"event" : "eventB",
"user_id" : 1,
"created_at" : NumberLong("1557127278000"),
"updated_at" : NumberLong("1557127278000")
}
{
"_id" : ObjectId("5d8b132388edcf85b0aaa60b"),
"event" : "eventC",
"user_id" : 1,
"created_at" : NumberLong("1557127278000"),
"updated_at" : NumberLong("1557127278000")
}
{
"_id" : ObjectId("5d8b132388edcf85b0aaa60c"),
"event" : "eventC",
"user_id" : 1,
"created_at" : NumberLong("1557127278000"),
"updated_at" : NumberLong("1557127278000")
}
{
"_id" : ObjectId("5d8b132388edcf85b0aaa60d"),
"event" : "eventC",
"user_id" : 2,
"created_at" : NumberLong("1557127278000"),
"updated_at" : NumberLong("1557127278000")
}
Output:
{ "event_details" : { "eventA" : 2, "eventB" : 1, "eventC" : 2 } }
Query analysis:
Grouping the data on the basis of user_id and event and calculating the count of that specific incident
Grouping only on the basis of user_id and pushing event and its
count into an array event_details as key-value pairs
Converting the event_details into an object
Applying the condition that the event count for ( A or B ) and C
should be greater than 0
Sum up individual event counts

MongoDB count distinct items

I have following query on a list with this fields : key,time,p,email
use app_db;
db.getCollection("app_log").aggregate(
[
{
"$match" : {
"key" : "login"
}
},
{
"$group" : {
"_id" : {
"$substr" : [
"$time",
0.0,
10.0
]
},
"total" : {
"$sum" : "$p"
},
"count" : {
"$sum" : 1.0
}
}
}
]
);
and the output is something like this :
{
"_id" : "2019-08-25",
"total" : NumberInt(623),
"count" : 400.0
}
{
"_id" : "2019-08-24",
"total" : NumberInt(2195),
"count" : 1963.0
}
{
"_id" : "2019-08-23",
"total" : NumberInt(1294),
"count" : 1706.0
}
{
"_id" : "2019-08-22",
"total" : NumberInt(53),
"count" : 1302.0
}
But I need the count to be distinctive on email field, which is count number of distinct email addresses who logged in per day and their p value is greater 0
You need $addToSet to get an array of unique email values per day and then you can use $size to count the number of items in that array:
db.getCollection("app_log").aggregate(
[
{
"$match" : {
"key" : "login"
}
},
{
"$group" : {
"_id" : {
"$substr" : [
"$time",
0.0,
10.0
]
},
"total" : {
"$sum" : "$p"
},
"emails" : {
"$addToSet": "$email"
}
}
},
{
$project: {
_id: 1,
total: 1,
countDistinct: { $size: "$emails" }
}
}
]
);

How to count from two fields in mongoDB

{
"_id" : ObjectId("56bd8e9de517259412a743ab"),
"user_token" : "mzXhdbCu",
"sender_details" : {
"location" : "XYZ",
"zipcode" : "610208"
},
"shipping_address" : {
"location" : "ABC",
"zipcode" : "602578
}
}
I have been trying to count the number of instances of each unique zipcode from both
$sender_details.zipcode
and
$shipping_address.zipcode
I tried to use the following code
db.ac_consignments.aggregate({
$group: {
_id: {
"zipcode":"$sender_details.zipcode",
"szipcode":"$shipping_address.zipcode"
},
count: {"$sum":1}
}
})
The output I receive is this
{
"result" : [
{
"_id" : {
"zipcode" : "610208",
"szipcode" : "602578"
},
"count" : 7
},
{
"_id" : {
"zipcode" : "602578",
"szipcode" : "678705"
},
"count" : 51
}
],
"ok" : 1
}
But what I require is the count of each zipcode present in $sender_details.zipcode and $shipping_address.zipcode totally. So an output like this
{
"result" : [
{
"_id" : {
"zipcode" : "610208",
},
"count" : 7
},
{
"_id" : {
"zipcode" : "602578"
},
"count" : 51
}
{
"_id" : {
"zipcode" : "678705"
},
"count" : 51
}
],
"ok" : 1
}
The following pipeline should work for you
db.getCollection('ac_consignments').aggregate([
{
$project: {
zipcode: [ "$sender_details.zipcode", "$shipping_address.zipcode" ]
}
},
{
$unwind: "$zipcode"
},
{
$group: {
_id: "$zipcode",
count: { $sum: 1 }
}
}
])
which produces output like this
/* 1 */
{
"_id" : "610208",
"count" : 1.0
}
/* 2 */
{
"_id" : "610209",
"count" : 2.0
}
/* 3 */
{
"_id" : "602578",
"count" : 1.0
}
/* 4 */
{
"_id" : "602579",
"count" : 2.0
}
when using the following as sample data
/* 1 */
{
"_id" : ObjectId("56bd8e9de517259412a743ab"),
"user_token" : "mzXhdbCu",
"sender_details" : {
"location" : "XYZ",
"zipcode" : "610208"
},
"shipping_address" : {
"location" : "ABC",
"zipcode" : "602578"
}
}
/* 2 */
{
"_id" : ObjectId("56bd8e9de517259412a743ac"),
"user_token" : "mzXhdbCu",
"sender_details" : {
"location" : "XYZ",
"zipcode" : "610209"
},
"shipping_address" : {
"location" : "ABC",
"zipcode" : "602579"
}
}
/* 3 */
{
"_id" : ObjectId("56bd8e9de517259412a753ac"),
"user_token" : "mzXhdbCu",
"sender_details" : {
"location" : "XYZ",
"zipcode" : "610209"
},
"shipping_address" : {
"location" : "ABC",
"zipcode" : "602579"
}
}
See the following GIF
Update for older versions
db.getCollection('ac_consignments').aggregate([
{
$project: {
sender_zip: "$sender_details.zipcode",
shipping_zip: "$shipping_address.zipcode",
party: { $literal: ["sender_zip", "shipping_zip"] }
}
},
{
$unwind: "$party"
},
{
$group: {
_id: "$_id",
zipcode: {
$push: {
$cond: [
{ $eq: ["$party", "sender_zip"] },
"$sender_zip",
"$shipping_zip"
]
}
}
}
},
{
$unwind: "$zipcode"
},
{
$group: {
_id: "$zipcode",
count: { $sum: 1 }
}
}
])