mongodb help required for one business problem - mongodb

Business Problem:
We are seeing a single customerOrderNumber with all versions having “INACTIVE.” This is a problem for multiple reasons. My goal is to be able to pull a list of customerOrderNumbers with ONLY INACTIVE statuses.
Database and Query: XXX_ORDERMGMT_1
db.getCollection('customerOrder').aggregate( [ { $match: { 'orderDocument.accountInfo.ban': '123456' } }, { $group: { _id: { customerOrderNumber : '$orderReference.customerOrderNumber', status : '$orderReference.customerOrderStatus' }, count: { $sum: 1 } }, }] )
OUTPUT:
/* 1 / { "_id" : { "customerOrderNumber" : "123", "status" : "COMPLETED" }, "count" : 1.0 } / 2 */ { "_id" : { "customerOrderNumber" : "123", "status" : "INACTIVE" }, "count" : 2.0 }
DESIRED_OUTPUT:
/* 1 */ { "_id" : { "customerOrderNumber" : "123", "statusGroupings" : { "status" : "COMPLETED", "status_cnt" : 1.0 }, { "status" : "INACTIVE", "status_cnt" : 2.0 } }, "count" : 3.0 }
( My approach was to pull all customerOrders by status and count, parse it into a relational format, and filter by only customerOrderNumbers with all versions being INACTIVE. This may not be the best way and I’m open to thoughts.)

The following query can get us the expected output:
db.getCollection("customerOrder").aggregate([
{
$match:{
"orderDocument.accountInfo.ban":"123456"
}
},
{
$group:{
"_id":{
"customerOrderNumber":"$orderReference.customerOrderNumber",
"status":"$orderReference.customerOrderStatus"
},
"customerOrderNumber":{
$first:"$orderReference.customerOrderNumber"
},
"status":{
$first:"$orderReference.customerOrderStatus"
},
"count":{
$sum:1
}
}
},
{
$group:{
"_id":"$customerOrderNumber",
"customerOrderNumber":{
$first:"$customerOrderNumber"
},
"statusGroupings":{
$push:{
"status":"$status",
"status_cnt":"$count"
}
},
"count":{
$sum:"$count"
}
}
},
{
$project:{
"_id":0
}
}
]).pretty()
Data set:
{
"_id" : ObjectId("5d9bf7204ed5d873f39a773b"),
"orderDocument" : {
"accountInfo" : {
"ban" : "123456"
}
},
"orderReference" : {
"customerOrderNumber" : 1,
"customerOrderStatus" : "INACTIVE"
}
}
{
"_id" : ObjectId("5d9bf7204ed5d873f39a773c"),
"orderDocument" : {
"accountInfo" : {
"ban" : "123456"
}
},
"orderReference" : {
"customerOrderNumber" : 1,
"customerOrderStatus" : "COMPLETED"
}
}
{
"_id" : ObjectId("5d9bf7204ed5d873f39a773d"),
"orderDocument" : {
"accountInfo" : {
"ban" : "123456"
}
},
"orderReference" : {
"customerOrderNumber" : 1,
"customerOrderStatus" : "INACTIVE"
}
}
{
"_id" : ObjectId("5d9bf7204ed5d873f39a773e"),
"orderDocument" : {
"accountInfo" : {
"ban" : "123456"
}
},
"orderReference" : {
"customerOrderNumber" : 1,
"customerOrderStatus" : "INACTIVE"
}
}
Output:
{
"customerOrderNumber" : 1,
"statusGroupings" : [
{
"status" : "COMPLETED",
"status_cnt" : 1
},
{
"status" : "INACTIVE",
"status_cnt" : 3
}
],
"count" : 4
}

Related

Creating measures in a mongodb aggregation pipeline

I have a report that has been developed in PowerBI. It runs over a collection of jobs, and for a given month and year counts the number of jobs that were created, due or completed in that month using measures.
I am attempting to reproduce this report using a mongoDB aggregation pipeline. At first, I thought I could just use the $group stage to do this, but quickly realised that grouping by a specific date would exclude jobs.
Some sample documents are below (most fields excluded as they are not relevant):
{
"_id": <UUID>,
"createdOn": ISODate("2022-07-01T00:00"),
"dueOn": ISODate("2022-08-01T00:00"),
"completedOn": ISODate("2022-07-29T00:00")
},
{
"_id": <UUID>,
"createdOn": ISODate("2022-06-01T00:00"),
"dueOn": ISODate("2022-08-01T00:00"),
"completedOn": ISODate("2022-07-24T00:00")
}
For example, if I group by created date, the record for July 2022 would show 1 created job and only 1 completed job, but it should show 2.
How can I go about recreating this report? One idea was that I needed to determine the minimum and maximum of all the possible dates across those 3 date fields in my collection, but I don't know where to go from there
I ended up solving this by using a facet. I followed this process:
Each facet field grouped by a different date field from the source document, and then aggregated the relevant field (e.g. counts, or sums as required). I ensured each of these fields in the facet had a unique name.
I then did a project stage where I took each of the facet stage fields (arrays), and concat them into a single array
I unwound the array, and then replaced the root to make it simpler to work with
I then grouped again by the _id field which was set to the relevant date during the facet field, and then grabbed the relevant fields.
The relevant parts of the pipeline are below:
db.getCollection("jobs").aggregate(
// Pipeline
[
// Stage 3
{
$facet: {
//Facet 1, group by created date, count number of jobs created
//facet 2, group by completed date, count number of jobs completed
//facet 3, group by due date, count number of jobs due
"created" : [
{
$addFields : {
"monthStarting" : {
"$dateFromString" : {
"dateString" : {
"$dateToString" : {
"date" : {
"$dateTrunc" : {
"date" : "$createdAt",
"unit" : "month",
"binSize" : 1.0,
"timezone" : "$timezone",
"startOfWeek" : "mon"
}
},
"timezone" : "$timezone"
}
}
}
},
"yearStarting" : {
"$dateFromString" : {
"dateString" : {
"$dateToString" : {
"date" : {
"$dateTrunc" : {
"date" : "$createdAt",
"unit" : "year",
"binSize" : 1.0,
"timezone" : "$timezone"
}
},
"timezone" : "$timezone"
}
}
}
}
}
},
{
$group : {
"_id" : {
"year" : "$yearStarting",
"month" : "$monthStarting"
},
"monthStarting" : {
"$first" : "$monthStarting"
},
"yearStarting" : {
"$first" : "$yearStarting"
},
"createdCount": {$sum: 1}
}
}
],
"completed" : [
{
$addFields : {
"monthStarting" : {
"$dateFromString" : {
"dateString" : {
"$dateToString" : {
"date" : {
"$dateTrunc" : {
"date" : "$completedDate",
"unit" : "month",
"binSize" : 1.0,
"timezone" : "$timezone",
"startOfWeek" : "mon"
}
},
"timezone" : "$timezone"
}
}
}
},
"yearStarting" : {
"$dateFromString" : {
"dateString" : {
"$dateToString" : {
"date" : {
"$dateTrunc" : {
"date" : "$completedDate",
"unit" : "year",
"binSize" : 1.0,
"timezone" : "$timezone"
}
},
"timezone" : "$timezone"
}
}
}
}
}
},
{
$group : {
"_id" : {
"year" : "$yearStarting",
"month" : "$monthStarting"
},
"monthStarting" : {
"$first" : "$monthStarting"
},
"yearStarting" : {
"$first" : "$yearStarting"
},
"completedCount": {$sum: 1}
}
}
],
"due": [
{
$match: {
"dueDate": {$ne: null}
}
},
{
$addFields : {
"monthStarting" : {
"$dateFromString" : {
"dateString" : {
"$dateToString" : {
"date" : {
"$dateTrunc" : {
"date" : "$dueDate",
"unit" : "month",
"binSize" : 1.0,
"timezone" : "$timezone",
"startOfWeek" : "mon"
}
},
"timezone" : "$timezone"
}
}
}
},
"yearStarting" : {
"$dateFromString" : {
"dateString" : {
"$dateToString" : {
"date" : {
"$dateTrunc" : {
"date" : "$dueDate",
"unit" : "year",
"binSize" : 1.0,
"timezone" : "$timezone"
}
},
"timezone" : "$timezone"
}
}
}
}
}
},
{
$group : {
"_id" : {
"year" : "$yearStarting",
"month" : "$monthStarting"
},
"monthStarting" : {
"$first" : "$monthStarting"
},
"yearStarting" : {
"$first" : "$yearStarting"
},
"dueCount": {$sum: 1},
"salesRevenue": {$sum: "$totalSellPrice"},
"costGenerated": {$sum: "$totalBuyPrice"},
"profit": {$sum: "$profit"},
"avgValue": {$avg: "$totalSellPrice"},
"finalisedRevenue": {$sum: {
$cond: {
"if": {$in: ["$status",["Finalised","Closed"]]},
"then": "$totalSellPrice",
"else": 0
}
}}
}
}
]
}
},
// Stage 4
{
$project: {
"docs": {$concatArrays: ["$created","$completed","$due"]}
}
},
// Stage 5
{
$unwind: {
path: "$docs",
}
},
// Stage 6
{
$replaceRoot: {
// specifications
"newRoot": "$docs"
}
},
// Stage 7
{
$group: {
_id: "$_id",
"monthStarting" : {
"$first" : "$monthStarting"
},
"yearStarting" : {
"$first" : "$yearStarting"
},
"monthStarting" : {
"$first" : "$monthStarting"
},
"createdCountSum" : {
"$sum" : "$createdCount"
},
"completedCountSum" : {
"$sum" : "$completedCount"
},
"dueCountSum" : {
"$sum" : "$dueCount"
},
"salesRevenue" : {
"$sum" : "$salesRevenue"
},
"costGenerated" : {
"$sum" : "$costGenerated"
},
"profit" : {
"$sum" : "$profit"
},
"finalisedRevenue" : {
"$sum" : "$finalisedRevenue"
},
"avgJobValue": {
$sum: "$avgValue"
}
}
},
],
);

Calculating day-of-week frequency with Mongo Aggregate

I'm trying to calculate the frequency of site visits using a Mongo Aggregate function, ie. in a given week, how many days did a user visit the site?
{ "_id" : ObjectId("5f7720caf2b93af8d566bc7c"), "email" : "blah#gmail.com", "timestamp" : ISODate("2020-09-29T17:59:00Z") }
{ "_id" : ObjectId("5f7720dcf2b93af8d566ffb7"), "email" : "blah#gmail.com", "timestamp" : ISODate("2020-09-30T01:01:00Z") }
{ "_id" : ObjectId("5f7721bbf2b93af8d56aadc4"), "email" : "yack#gmail.com", "timestamp" : ISODate("2020-10-01T09:58:00Z") }
{ "_id" : ObjectId("5f771e9ff2b93af8d55c57a9"), "email" : "yack#gmail.com", "timestamp" : ISODate("2020-09-26T04:12:00Z") }
{ "_id" : ObjectId("5f771e9ff2b93af8d55c5f6b"), "email" : "yack#gmail.com", "timestamp" : ISODate("2020-09-26T04:22:00Z") }
{ "_id" : ObjectId("5f771eeaf2b93af8d55dc45c"), "email" : "yack#gmail.com", "timestamp" : ISODate("2020-09-27T04:11:00Z") }
Output I'd like:
[
{ "_id": "blah#gmail.com", "dow" [ 2, 3 ], "visits": 2 }, // Visited Tuesday and Wednesday
{ "_id": "yack#gmail.com", "dow" [ 0, 1, 2, 3, 4, 5 ], "visits": 6 } // Visited Sunday through to Friday
]
I can get each email/dow pair as a record, but I'm not sure where to go from here...
[
{
$group: {
_id: { email: "$email", dow: { $dayOfWeek: "$timestamp" } },
}
}
]
Outputs:
{ "_id" : { "email" : "blah#gmail.com", "dow" : 2 } }
{ "_id" : { "email" : "blah#gmail.com", "dow" : 3 } }
{ "_id" : { "email" : "yack#gmail.com", "dow" : 0 } }
{ "_id" : { "email" : "yack#gmail.com", "dow" : 1 } }
{ "_id" : { "email" : "yack#gmail.com", "dow" : 2 } }
{ "_id" : { "email" : "yack#gmail.com", "dow" : 3 } }
{ "_id" : { "email" : "yack#gmail.com", "dow" : 4 } }
{ "_id" : { "email" : "yack#gmail.com", "dow" : 5 } }
Thanks!
You can use another $group statement:
db.collection.aggregate([
{
$group: {
_id: { email: "$email", dow: { $dayOfWeek: "$timestamp" } }
}
},
{
$group: {
_id: "$_id.email",
dow: { $push: "$_id.dow" },
visits: { $sum: 1 }
}
}
])
Mongo Playground

How to compare two date fields in a document, one of them being embedded in an array?

Im trying to compare two fields with specified $type = "date" however when i try to query i get 0 results.
My end goal is to output all documents with s.status =6 and s.time < r.
db.inventory.insertMany( [
{ item: "canvas","r" : {"$date" : "2019-05-23T00:00:00.000+0000"},"s" :[{ "status" : 1, "time" : { "$date" : "2019-05-23T23:03:10.000+0000" } }, { "status" : 2, "time" : { "$date" : "2019-05-23T23:05:03.000+0000" } }, { "status" : 4, "time" : { "$date" : "2019-05-23T23:05:06.000+0000" } }, { "status" : 5, "time" : { "$date" : "2019-05-23T23:05:07.000+0000" } }, { "status" : 6, "time" : { "$date" : "2019-05-23T23:05:09.000+0000" } } ]},
{ item: "paper","r" : { "$date" : "2019-05-24T06:00:00.000+0000" }, "s" : [ { "status" : 1, "time" : { "$date" : "2019-05-23T23:03:10.000+0000" } }, { "status" : 2, "time" : { "$date" : "2019-05-23T23:05:03.000+0000" } }, { "status" : 3, "time" : { "$date" : "2019-05-23T23:05:06.000+0000" } }, { "status" : 4, "time" : { "$date" : "2019-05-25T05:05:07.000+0000" } }, { "status" : 6, "time" : { "$date" : "2019-05-25T05:55:09.000+0000" } } ] },
{ item: "stone","r" : { "$date" : "2019-05-23T05:00:00.000+0000" }, "s" : [ { "status" : 1, "time" : { "$date" : "2019-05-23T23:03:10.000+0000" } }, { "status" : 2, "time" : { "$date" : "2019-05-23T23:05:03.000+0000" } } ] },
{ item: "mass","r" : {"$date" : "2019-05-24T06:00:00.000+0000"},"s" :[ { "status" : 1, "time" : { "$date" : "2019-05-23T23:03:10.000+0000" } }, { "status" : 2, "time" : { "$date" : "2019-05-23T23:05:03.000+0000" } }, { "status" : 4, "time" : { "$date" : "2019-05-23T23:05:06.000+0000" } }, { "status" : 5, "time" : { "$date" : "2019-05-23T23:05:07.000+0000" } }, { "status" : 6, "time" : { "$date" : "2019-05-24T06:06:09.000+0000" } } ] },
{item : "paper","r" : {"$date" : "2019-05-24T06:00:00.000+0000" },"s" :[ { "status" : 1, "time" : { "$date" : "2019-05-23T23:03:10.000+0000" } }, { "status" : 2, "time" : { "$date" : "2019-05-23T23:05:03.000+0000" } }, { "status" : 4, "time" : { "$date" : "2019-05-23T23:05:06.000+0000" } }, { "status" : 5, "time" : { "$date" : "2019-05-23T23:05:07.000+0000" } }, { "status" : 6, "time" : { "$date" : "2019-05-24T06:06:09.000+0000" } } ] }
]);
what I've tried-
db.inventory.find({"s": {"$elemMatch" : {"status" : 6,"time" : { "$lt" : ISODate(this.r)}}});
I'm getting no error butt also 0 results.
The $elemMatch doesn't support comparison with fields till now.
The following query can get us the expected output:
db.inventory.find({
$expr:{
$gt:[
{
$size:{
$filter:{
"input":"$s",
"as":"doc",
"cond":{
$and:[
{
$eq:["$$doc.status",6]
},
{
$lt:["$$doc.time","$r"]
}
]
}
}
}
},
0
]
}
}).pretty()

How to write mongo query

How I can get the total number of seats available for a particular movie (seats present in all the theatres for that movie) from the mongodb schema below.
I need to write a mongo query to get the results
{
"_id" : ObjectId("5d637b5ce27c7d60e5c42ae7"),
"name" : "Bangalore",
"movies" : [
{
"name" : "KGF",
"theatres" : [
{
"name" : "PVR",
"seats" : 45
},
{
"name" : "IMAX",
"seats" : 46
}
]
},
{
"name" : "Avengers",
"theatres" : [
{
"name" : "IMAX",
"seats" : 50
}
]
}
],
"_class" : "com.BMS_mongo.ZZ_BMS_mongo_demo.Entity.CityInfo"
}
I have written this code :
db.cities.aggregate( [
{ "$unwind" : "$movies" }, { "$unwind" : "$theatres" } ,
{ "$group" : { _id : "$movies.theatre`enter code here`s.seats" ,
total : { "$sum" : "$seats" } }
}
] )
My schema:
The following query can get us the expected output:
db.collection.aggregate([
{
$unwind:"$movies"
},
{
$unwind:"$movies.theatres"
},
{
$group:{
"_id":"$movies.name",
"movie":{
$first:"$movies.name"
},
"totalSeats":{
$sum:"$movies.theatres.seats"
}
}
},
{
$project:{
"_id":0
}
}
]).pretty()
Data set:
{
"_id" : ObjectId("5d637b5ce27c7d60e5c42ae7"),
"name" : "Bangalore",
"movies" : [
{
"name" : "KGF",
"theatres" : [
{
"name" : "PVR",
"seats" : 45
},
{
"name" : "IMAX",
"seats" : 46
}
]
},
{
"name" : "Avengers",
"theatres" : [
{
"name" : "IMAX",
"seats" : 50
}
]
}
],
"_class" : "com.BMS_mongo.ZZ_BMS_mongo_demo.Entity.CityInfo"
}
Output:
{ "movie" : "Avengers", "totalSeats" : 50 }
{ "movie" : "KGF", "totalSeats" : 91 }
Query:
db.movie.aggregate([{ $unwind: { path: "$movies",} },
{ $unwind: { path: "$movies.theatres",} },
{ $group: { _id: "$movies.name", "moviename": { $first: "$movies.name" },
"totalSeats": { $sum: "$movies.theatres.seats" }} }])
I got the answer using this query ...
db.cities.aggregate( [
{ "$match" : { "name" : "Bangalore" } },
{ "$unwind" : "$movies" } ,
{ "$match" : {"movies.name" : "KGF"} },
{ "$unwind" : "$theatres" },
{ "$group" : { _id : "$movies.name", total : { "$sum" : "$movies.theatres.seats"
} } }
] )

How to count from two fields in mongoDB

{
"_id" : ObjectId("56bd8e9de517259412a743ab"),
"user_token" : "mzXhdbCu",
"sender_details" : {
"location" : "XYZ",
"zipcode" : "610208"
},
"shipping_address" : {
"location" : "ABC",
"zipcode" : "602578
}
}
I have been trying to count the number of instances of each unique zipcode from both
$sender_details.zipcode
and
$shipping_address.zipcode
I tried to use the following code
db.ac_consignments.aggregate({
$group: {
_id: {
"zipcode":"$sender_details.zipcode",
"szipcode":"$shipping_address.zipcode"
},
count: {"$sum":1}
}
})
The output I receive is this
{
"result" : [
{
"_id" : {
"zipcode" : "610208",
"szipcode" : "602578"
},
"count" : 7
},
{
"_id" : {
"zipcode" : "602578",
"szipcode" : "678705"
},
"count" : 51
}
],
"ok" : 1
}
But what I require is the count of each zipcode present in $sender_details.zipcode and $shipping_address.zipcode totally. So an output like this
{
"result" : [
{
"_id" : {
"zipcode" : "610208",
},
"count" : 7
},
{
"_id" : {
"zipcode" : "602578"
},
"count" : 51
}
{
"_id" : {
"zipcode" : "678705"
},
"count" : 51
}
],
"ok" : 1
}
The following pipeline should work for you
db.getCollection('ac_consignments').aggregate([
{
$project: {
zipcode: [ "$sender_details.zipcode", "$shipping_address.zipcode" ]
}
},
{
$unwind: "$zipcode"
},
{
$group: {
_id: "$zipcode",
count: { $sum: 1 }
}
}
])
which produces output like this
/* 1 */
{
"_id" : "610208",
"count" : 1.0
}
/* 2 */
{
"_id" : "610209",
"count" : 2.0
}
/* 3 */
{
"_id" : "602578",
"count" : 1.0
}
/* 4 */
{
"_id" : "602579",
"count" : 2.0
}
when using the following as sample data
/* 1 */
{
"_id" : ObjectId("56bd8e9de517259412a743ab"),
"user_token" : "mzXhdbCu",
"sender_details" : {
"location" : "XYZ",
"zipcode" : "610208"
},
"shipping_address" : {
"location" : "ABC",
"zipcode" : "602578"
}
}
/* 2 */
{
"_id" : ObjectId("56bd8e9de517259412a743ac"),
"user_token" : "mzXhdbCu",
"sender_details" : {
"location" : "XYZ",
"zipcode" : "610209"
},
"shipping_address" : {
"location" : "ABC",
"zipcode" : "602579"
}
}
/* 3 */
{
"_id" : ObjectId("56bd8e9de517259412a753ac"),
"user_token" : "mzXhdbCu",
"sender_details" : {
"location" : "XYZ",
"zipcode" : "610209"
},
"shipping_address" : {
"location" : "ABC",
"zipcode" : "602579"
}
}
See the following GIF
Update for older versions
db.getCollection('ac_consignments').aggregate([
{
$project: {
sender_zip: "$sender_details.zipcode",
shipping_zip: "$shipping_address.zipcode",
party: { $literal: ["sender_zip", "shipping_zip"] }
}
},
{
$unwind: "$party"
},
{
$group: {
_id: "$_id",
zipcode: {
$push: {
$cond: [
{ $eq: ["$party", "sender_zip"] },
"$sender_zip",
"$shipping_zip"
]
}
}
}
},
{
$unwind: "$zipcode"
},
{
$group: {
_id: "$zipcode",
count: { $sum: 1 }
}
}
])