I have searched a lot to use groupby based on the array field value, but I didn't get proper results in google, so I'm posting here.
I have tried my best, it works 50% need to correct my query can anyone help me with this
I have a database value like
{"_id": "62b0bec8922dc767f8b933b4",
"seatSeletion": [{
"rowNo": 0,
"columnNo": 0,
"seatNo": 3
}, {
"rowNo": 0,
"columnNo": 1,
"seatNo": 4
}],
"movieId": "62af1ff6cb38656a4ffe36aa",
"movieDate": "2022-06-20T18:14:38.133+00:00",
"movieTiming": "1:30 p.m",
},
{"_id": "62b0b91560f57e0cb220db02","seatSeletion": [{
"rowNo": 0,
"columnNo": 0,
"seatNo": 1
}, {
"rowNo": 0,
"columnNo": 1,
"seatNo": 2
}],
"movieId": "62af1ff6cb38656a4ffe36aa",
"movieDate": "2022-06-20T18:14:38.133+00:00",
"movieTiming": "1:30 p.m",
}
expected output
{
"seatSeletion": [
{
"rowNo": 0,
"columnNo": 0,
"seatNo": 1
},
{
"rowNo": 0,
"columnNo": 1,
"seatNo": 2
},
{
"_id": "62b0b90e60f57e0cb220db00",
"rowNo": 0,
"columnNo": 0,
"seatNo": 3
},
{
"_id": "62b0b90e60f57e0cb220db01",
"rowNo": 0,
"columnNo": 1,
"seatNo": 4
}
],
"movieTiming": "1:30 p.m",
"movieId": "62af1ff6cb38656a4ffe36aa",
"movieDate": "2022-06-20T11:03:37.000Z"
},
this is how I tried in my query
Bookings.aggregate([
{
$match: {
$and: [{ movieId: ObjectId(bookingParam.movieId) },
{ movieTiming: bookingParam.movieTiming },
{ movieDate: dateQuery },
]
}
},
{
$group: {
_id: {
seatSeletion: '$seatSeletion', movieTiming: '$movieTiming',
movieId: '$movieId', movieDate: '$movieDate', createdBy: "$createdBy", updatedBy: "$updatedBy", movies: "$movies"
}
}
},
{
$project: {
seatSeletion: '$_id.seatSeletion', movieTiming: '$_id.movieTiming',
movieId: '$_id.movieId', movieDate: '$_id.movieDate', movies: "$_id.movies",
_id: 0
}
}
])
but i got it like this
{
"seatSeletion": [
{
"_id": "62b0b91560f57e0cb220db03",
"rowNo": 0,
"columnNo": 0,
"seatNo": 1
},
{
"_id": "62b0b91560f57e0cb220db04",
"rowNo": 0,
"columnNo": 1,
"seatNo": 2
}
],
"movieTiming": "1:30 p.m",
"movieId": "62af1ff6cb38656a4ffe36aa",
"movieDate": "2022-06-20T11:03:37.000Z"
},
{
"seatSeletion": [
{
"_id": "62b0b90e60f57e0cb220db00",
"rowNo": 0,
"columnNo": 0,
"seatNo": 3
},
{
"_id": "62b0b90e60f57e0cb220db01",
"rowNo": 0,
"columnNo": 1,
"seatNo": 4
}
],
"movieTiming": "1:30 p.m",
"movieId": "62af1ff6cb38656a4ffe36aa",
"movieDate": "2022-06-20T11:03:37.000Z"
}
can anyone help me to fix this issue.
One option is using $reduce after the $group. It is important NOT to group by the seatSeletion as the value of this field is not common to these movies:
db.collection.aggregate([
{
$match: {
$and: [
{movieId: "62af1ff6cb38656a4ffe36aa"},
{movieTiming: "1:30 p.m"},
{movieDate: "2022-06-20T18:14:38.133+00:00"},
]
}
},
{
$group: {
_id: {movieTiming: "$movieTiming", movieId: "$movieId", movieDate: "$movieDate"},
seatSeletion: {$push: "$seatSeletion"}
}
},
{
$project: {
seatSeletion: {
$reduce: {
input: "$seatSeletion",
initialValue: [],
in: {$concatArrays: ["$$value", "$$this"]}
}
},
movieTiming: "$_id.movieTiming",
movieId: "$_id.movieId",
movieDate: "$_id.movieDate",
_id: 0
}
}
])
See how it works on the playground example
Another option is using $unwind instead of $reduce, but it is generally considered slower:
db.collection.aggregate([
{
$match: {
$and: [
{movieId: "62af1ff6cb38656a4ffe36aa"},
{movieTiming: "1:30 p.m"},
{movieDate: "2022-06-20T18:14:38.133+00:00"},
]
}
},
{$unwind: "$seatSeletion"},
{
$group: {
_id: {movieTiming: "$movieTiming", movieId: "$movieId", movieDate: "$movieDate"},
seatSeletion: {$push: "$seatSeletion"}
}
},
{
$project: {
seatSeletion: 1,
movieTiming: "$_id.movieTiming",
movieId: "$_id.movieId",
movieDate: "$_id.movieDate",
_id: 0
}
}
])
See how it works on the playground example - unwind
more output nearly you expect
{
"_id": {
"movieId": "62af1ff6cb38656a4ffe36aa",
"movieDate": "2022-06-20T18:14:38.133+00:00",
"movieTiming": "1:30 p.m"
},
"seatSeletion": [
{ "rowNo": 0,"columnNo": 0,"seatNo": 3
},
{ "rowNo": 0,"columnNo": 1,"seatNo": 4
},
{ "rowNo": 0,"columnNo": 0,"seatNo": 1
},
{ "rowNo": 0,"columnNo": 1,"seatNo": 2
}
]
}
query
db.collection.aggregate(
{
$match: {}
},
{
$unwind: {
path: '$seatSeletion'
}
},
{
$group: {
_id:
{
movieId: '$movieId',
movieDate: '$movieDate',
movieTiming: '$movieTiming'
},
seatSeletion:
{ $push: '$seatSeletion' }
}
}
)
Related
I have following stat data stored daily for users.
{
"_id": {
"$oid": "638df4e42332386e0e06d322"
},
"appointment_count": 1,
"item_id": 2,
"item_type": "user",
"company_id": 5,
"created_date": "2022-12-05",
"customer_count": 1,
"lead_count": 1,
"door_knocks": 10
}
{
"_id": {
"$oid": "638f59a9bf33442a57c3aa99"
},
"lead_count": 2,
"item_id": 2,
"item_type": "user",
"company_id": 5,
"created_date": "2022-12-06",
"video_viewed": 2,
"door_knocks": 9
}
And I'm using the following query to get the items by rank
user_stats_2022_12.aggregate([{"$match":{"company_id":5,"created_date":{"$gte":"2022-12-04","$lte":"2022-12-06"}}},{"$setWindowFields":{"partitionBy":"$company_id","sortBy":{"door_knocks":-1},"output":{"item_rank":{"$denseRank":{}},"stat_sum":{"$sum":"$door_knocks"}}}},{"$facet":{"metadata":[{"$count":"total"}],"data":[{"$skip":0},{"$limit":100},{"$sort":{"item_rank":1}}]}}])
It's giving me the rank but with the above data, the record with item_id: 2 are having different rank for same item_id. So I wanted to group them by item_id and then applied rank.
It's a little messy, but here's a playground - https://mongoplayground.net/p/JrJOo4cl9X1.
If you're going to sort by knocks after grouping, I'm assuming that you'll want the sum of door_knocks for a given item_id for this sort.
db.collection.aggregate([
{
$match: {
company_id: 5,
created_date: {
"$gte": "2022-12-04",
"$lte": "2022-12-06"
}
}
},
{
$group: {
_id: {
item_id: "$item_id",
company_id: "$company_id"
},
docs: {
$push: "$$ROOT"
},
total_door_knocks: {
$sum: "$door_knocks"
}
}
},
{
$setWindowFields: {
partitionBy: "$company_id",
sortBy: {
total_door_knocks: -1
},
output: {
item_rank: {
"$denseRank": {}
},
stat_sum: {
"$sum": "$total_door_knocks"
}
}
}
},
{
$unwind: "$docs"
},
{
$project: {
_id: "$docs._id",
appointment_count: "$docs.appointment_count",
company_id: "$docs.company_id",
created_date: "$docs.created_date",
customer_count: "$docs.customer_count",
door_knocks: "$docs.door_knocks",
item_id: "$docs.item_id",
item_type: "$docs.item_type",
lead_count: "$docs.lead_count",
item_rank: 1,
stat_sum: 1,
total_door_knocks: 1
}
},
{
$facet: {
metadata: [
{
"$count": "total"
}
],
data: [
{
"$skip": 0
},
{
"$limit": 100
},
{
"$sort": {
"item_rank": 1
}
}
]
}
}
])
I have documents with below schema
id :
currencyCode : "USD"
businessDayStartDate : ""
hourZoneNumber : 1
customerCount : 0
itemQuantity : 4
nodeId : "STORE_DEV"
endpointId : "998"
amount : 4
I am trying to find documents that match nodeId and trying to aggregate customerCount, itemQuantity and amount for each hourZoneNumber.
Below is the query
db.getCollection("xxx").aggregate([
{ "$match": { "nodeId": { "$in":["STORE_DEV_1","STORE_DEV_2"] }, "businessDayStartDate" : { "$gte": "2022-03-04" , "$lte": "2022-03-07" } }},
{ "$group": {
"_id": {
"nodeId": "$nodeId",
"endpointId": "$endpointId",
"hourZoneNumber": "$hourZoneNumber"
},
"customerCount": { "$sum": "$customerCount" },
"itemQuantity" : { "$sum": "$itemQuantity" },
"amount" : { "$sum": "$amount" }
}
},
{ "$group": {
"_id": {
"nodeId": "$_id.nodeId",
"endpointId": "$_id.endpointId"
},
"hourZones": {
"$addToSet": {
"hourZoneNumber": "$_id.hourZoneNumber",
"customerCount": { "$sum": "$customerCount" },
"itemQuantity" : { "$sum": "$itemQuantity" },
"amount" : { "$sum": "$amount" }
}
}
}
},
{ "$group": {
"_id": "$_id.nodeId",
"endpoints": {
"$addToSet": {
"endpointId": "$_id.endpointId",
"hourZones": "$hourZones"
}
},
"total": {
"$addToSet": {
"customerCount": { "$sum": "$hourZones.customerCount" },
"itemQuantity" : { "$sum": "$hourZones.itemQuantity" },
"amount" : { "$sum": "$hourZones.amount" }
}
}
}
},
{
$project: {
_id: 0,
nodeId: "$_id",
endpoints: 1,
hourZones: 1,
total: 1
}
}
])
Output is as below:
{
nodeId: 'STORE_DEV_2',
endpoints: [
{ endpointId: '998',
hourZones:
[
{ hourZoneNumber: 1,
customerCount: 0,
itemQuantity: 4,
amount: Decimal128("4") }
] } ],
total: [ { customerCount: 0, itemQuantity: 4, amount: Decimal128("4") } ],
}
{
nodeId: 'STORE_DEV_1',
endpoints:
[ { endpointId: '999',
hourZones:
[ { hourZoneNumber: 2,
customerCount: 2,
itemQuantity: 4,
amount: Decimal128("4") },
{ hourZoneNumber: 1,
customerCount: 4,
itemQuantity: 8,
amount: Decimal128("247.56") } ] } ],
total:
[ { customerCount: 6,
itemQuantity: 12,
amount: Decimal128("251.56") } ]
}
I want the output to be sorted as : First sort by nodeId, then by endpointId within the endpoints and lastly by hourZoneNumber within hourZones.
How do I do this ? I tried using sort() with all the three fields. But it did not work. Also, can someone please confirm if there is any better way than the above code, as I am new to Mongo DB.
Edit:
Please find sample input data at https://mongoplayground.net/p/FYm3QMMgrNI
Since you already have the separated data at the beginning, it is simply a matter of saving these values through the grouping and then sorting by them in the end.
Edit: In order to sort each inner array, we use $push instead of $addToSet inside the $group and $sort before each $group:
db.collection.aggregate([
{
"$match": {
"nodeId": {"$in": ["STORE_DEV_TTEC", "STORE_DEV_TTEZ"]
},
"businessDayStartDate": {"$gte": "2022-03-04", "$lte": "2022-03-07"}
}
},
{
"$sort": {"nodeId": 1, "endpointId": 1, "hourZoneNumber": 1}
},
{
"$group": {
"_id": {
"nodeId": "$nodeId",
"endpointId": "$endpointId",
"hourZoneNumber": "$hourZoneNumber"
},
"customerCount": {"$sum": "$customerCount"},
"itemQuantity": {"$sum": "$itemQuantity"},
"amount": {"$sum": "$amount"}
}
},
{"$sort": {"_id.hourZoneNumber": 1}
},
{
"$group": {
"_id": {
"nodeId": "$_id.nodeId",
"endpointId": "$_id.endpointId"
},
"hourZones": {
"$push": {
"hourZoneNumber": "$_id.hourZoneNumber",
"customerCount": {"$sum": "$customerCount"},
"itemQuantity": {"$sum": "$itemQuantity"},
"amount": {"$sum": "$amount"}
}
},
hourZoneKey: {$first: "$_id.hourZoneNumber"}
}
},
{"$sort": {"_id.endpointId": 1}
},
{
"$group": {
"_id": "$_id.nodeId",
"endpoints": {
"$push": {
"endpointId": "$_id.endpointId",
"hourZones": "$hourZones"
}
},
endpointKey: {$first: "$_id.endpointId"},
hourZoneKey: {$first: "$hourZoneKey"}
}
},
{"$sort": {"nodeId": 1, "endpointKey": 1, "hourZoneKey": 1}
},
{
$project: {_id: 0, nodeId: "$_id", endpoints: 1, hourZones: 1, total: 1}
}
])
You can see it here
$group:{
_id:{
status:"$dir_status",
directions:"$direction"
},
"X": {
"$sum": { $cond: [{ "$eq": ["$dir_status", "0"] }, 1, 0] }
},
"Y": {
"$sum": { $cond: [{ "$eq": ["$dir_status", "1"] }, 1, 0] }
},
"Z": {
"$sum": { $cond: [{ "$eq": ["$dir_status", "2"] }, 1, 0] }
},
count:{$sum:1}
}
after an aggregation i got this output
[
{
_id: { level: 'd1' },
X: 0,
Y: 0,
Z: 0,
count: 3
},
{
_id: { level: 'd2' },
X: 0,
Y: 0,
Z: 0,
count: 3
},
{
_id: { level: 'd3' },
X: 0,
Y: 0,
Z: 0,
count: 4
},
]
Now i want to group it as
[
level:"$_id.level",
range:[ { dir:"X", count:"$X"}, { dir:"Y", count:"$Y"}, { dir:"Z", count:"$Z"}]
]
i used $push method but its not working combining three objects.
Help me to resolve this to get output like this
[{level:"d1", range:[{dir:"X", count:0}, {dir:"Y", count:5}]]
I want to retrieve data from mongodb, grouping and summing for a custom field based in a db field which can exist or not but I don't have the result I expect because I there is no data ggrupation (see attached file)enter image description here. The mongo statement is:
aggregate({
$match: {
owner: 'W99999',
creation_date: {
$gte: 1530748800,
$lte: 1531292133
}
},
$project: {
isWarm: {
$cond: [{
$not: ["$referral"] }, 1, 0 ]
},
isCold: {
$cond: [{
$not: ["$referral"] }, 0, 1 ]
},
daysBefore: {
$subtract: [6, {
$trunc: {
$divide: [{
$subtract: ['$creation_date', 1530748800]
}, 86400]
}
}]
}
},
$group: {
_id: {
isWarm: { $sum: "$isWarm" },
isCold: { $sum: "$isCold" },
daysBefore: '$daysBefore'
}
})
I think the problem is the "isWarm" and "isCold" condition for creating them. Thank you in advance.
UPDATE 05/07/2018.
Schema (trunked for security reasons):
{
"_id": "1",
"creation_date":"1515780901",
"referral: //This field is optional.
{
some_data: { }
},
more_data: { }
}
Result expected:
{ [
{ isCold: 3, isWarm: 2, daysBefore: 0 },
{ isCold: 2, isWarm: 5, daysBefore: 1 },
{ isCold: 5, isWarm: 0, daysBefore: 2 },
{ isCold: 1, isWarm: 2, daysBefore: 3 },
{ isCold: 1, isWarm: 1, daysBefore: 4 },
{ isCold: 1, isWarm: 0, daysBefore: 5 },
{ isCold: 0, isWarm: 0, daysBefore: 6 }
] }
I would like to have the object even if there is no documents to count (e.g. last line of the result).
RESOLVED: I need to test with real data.
I think I have a solution:
[{ $project: {
_id: 0,
daysBefore: {
$subtract: [6, {
$trunc: {
$divide: [{
$subtract: ['$creation_date', 1530748800] }, 86400]
}
}]
},
isWarm: {$cond: [{ $gte: ['$referral', null]}, 1, 0]},
isCold: {$cond: [{ $gte: ['$referral', null]}, 0, 1]}} },
{
$group:
{
_id: { creation_date: '$daysBefore' },
isWarm: { $sum: '$isWarm' },
isCold: { $sum: '$isCold' }
}
}]
Given this dataset:
db.calls.insert([{
"agent": 2,
"isFromOutside": true,
"duration": 304
}, {
"agent": 1,
"isFromOutside": false,
"duration": 811
}, {
"agent": 0,
"isFromOutside": true,
"duration": 753
}, {
"agent": 1,
"isFromOutside": false,
"duration": 593
}, {
"agent": 3,
"isFromOutside": true,
"duration": 263
}, {
"agent": 0,
"isFromOutside": true,
"duration": 995
}, {
"agent": 0,
"isFromOutside": false,
"duration": 210
}, {
"agent": 1,
"isFromOutside": false,
"duration": 737
}, {
"agent": 2,
"isFromOutside": false,
"duration": 170
}, {
"agent": 0,
"isFromOutside": false,
"duration": 487
}])
I have two aggregate queries that give the total duration for each agent and the count of outgoing calls for each client:
get outGoingCalls table:
db.calls.aggregate([
{ $match: { duration :{ $gt: 0 }, isFromOutside: false } },
{ $group: { _id: "$agent", outGoingCalls: { $sum: 1 } } },
{ $sort: { outGoingCalls: -1 } }
])
get totalDuration table:
db.calls.aggregate([
{ $group: { _id: "$agent", totalDuration: { $sum: "$duration" } } },
{ $sort: {totalDuration: -1 } }
])
How to merge/join these tables (or do only one aggregation) to have something like this:
[
{_id: 0, totalDuration: ..., outGoingCalls: ...},
{_id: 1, totalDuration: ..., outGoingCalls: ...},
{_id: 2, totalDuration: ..., outGoingCalls: ...},
...
]
Try the following aggregation framework:
db.calls.aggregate([
{
"$group": {
"_id": "$agent",
"outGoingCalls": {
"$sum": {
"$cond": [
{
"$and": [
{"$gt": ["$duration", 0 ]},
{"$eq": ["$isFromOutside", false ]}
]
},
1,
0
]
}
},
"totalDuration": { "$sum": "$duration" }
}
},
{
"$sort": {
"totalDuration": -1,
"outGoingCalls": -1
}
}
])
Output:
/* 0 */
{
"result" : [
{
"_id" : 0,
"outGoingCalls" : 2,
"totalDuration" : 2445
},
{
"_id" : 1,
"outGoingCalls" : 3,
"totalDuration" : 2141
},
{
"_id" : 2,
"outGoingCalls" : 1,
"totalDuration" : 474
},
{
"_id" : 3,
"outGoingCalls" : 0,
"totalDuration" : 263
}
],
"ok" : 1
}