MongoDB avoid duplicates using $addToSet in aggregation pipeline - mongodb

there is aggregation pipeline:
db.getCollection('yourCollection').aggregate(
{
$unwind: {
path: "$dates",
includeArrayIndex: "idx"
}
},
{
$project: {
_id: 0,
dates: 1,
numbers: { $arrayElemAt: ["$numbers", "$idx"] },
goals: { $arrayElemAt: ["$goals", "$idx"] },
durations: { $arrayElemAt: ["$durations", "$idx"] }
}
}
)
which perform on the following data (sample documents):
{
"_id" : ObjectId("52d017d4b60fb046cdaf4851"),
"dates" : [
1399518702000,
1399126333000,
1399209192000,
1399027545000
],
"dress_number" : "4",
"name" : "J. Evans",
"numbers" : [
"5982",
"5983",
"5984",
"5985"
],
"goals": [
"1",
"0",
"4",
"2"
],
"durations": [
"78",
"45",
"90",
"90"
]
}
{
"_id" : ObjectId("57e250c1b60fb0213d06737c"),
"dates" : [
"1399027545000",
"1399101432000",
"1399026850000",
"1399904504000"
],
"dress_number" : "6",
"name" : K. Mitnick,
"numbers" : [
"0982",
"0981",
"0958",
"0982"
],
"durations" : [
98,
110,
66,
92
],
"goals" : [
"2",
"3",
"0",
"1"
]
}
The query works good, but there are duplicate records so I'm trying to use $addToSet operator to avoid duplicates:
db.getCollection('yourCollection').aggregate(
{
$match: {
"number": number
}
},
{
$unwind: {
path: "$dates",
includeArrayIndex: "idx"
}
},
$group: {
_id: '$_id',
dates: { $addToSet: '$dates' }
},
{
$project: {
_id: 0,
dates: 1,
numbers: { $arrayElemAt: ["$numbers", "$idx"] },
goals: { $arrayElemAt: ["$goals", "$idx"] },
durations: { $arrayElemAt: ["$durations", "$idx"] }
}
}
)
but I got only dates (other field are null)
{ dates:
[ '1399026850000',
'1399101432000',
'1399027545000',
'1399904504000',
'1399024474000',
'1399126333000' ],
numbers: null,
goals: null,
durations: null },
{ dates:
[ '1399027545000',
'1399024474000',
'1399518702000',
'1399126333000',
'1399209192000',
'1399356651000' ],
numbers: null,
goals: null,
conversation_durations: null },
{ dates:
[ '1399026850000',
'1399101432000',
'1399027545000',
'1399904504000',
'1399024474000' ],
numbers: null,
goals: null,
durations: null }
Does anybody know where is the problem?

You need to include the fields within the $group pipeline using the $first operator as follows:
db.getCollection('yourCollection').aggregate([
{ "$unwind": "$dates" },
{
"$group": {
"_id": "$_id",
"dates": { "$addToSet": "$dates" },
"numbers": { "$first": "$numbers" },
"goals": { "$first": "$goals" },
"durations": { "$first": "$durations" }
}
},
{ "$unwind": {
"path": "$dates",
"includeArrayIndex": "idx"
} },
{
"$project": {
"_id": 0,
"dates": 1,
"numbers": { "$arrayElemAt": ["$numbers", "$idx"] },
"goals": { "$arrayElemAt": ["$goals", "$idx"] },
"durations": { "$arrayElemAt": ["$durations", "$idx"] }
}
}
])
or using $setUnion to eliminate duplicates as:
db.getCollection('yourCollection').aggregate([
{
"$project": {
"_id": 0,
"dates": { "$setUnion": ["$dates", "$dates"] },
"numbers": 1,
"goals": 1,
"durations": 1
}
}
{ "$unwind": {
"path": "$dates",
"includeArrayIndex": "idx"
} },
{
"$project": {
"_id": 0,
"dates": 1,
"dateIndex": "$idx",
"numbers": { "$arrayElemAt": ["$numbers", "$idx"] },
"goals": { "$arrayElemAt": ["$goals", "$idx"] },
"durations": { "$arrayElemAt": ["$durations", "$idx"] }
}
}
])

Related

Mongodb Aggregate Filter Array Of Array Of Array

We would like to filter SKU's List which has verificationData data and differenceInStock difference greater than or Less than 0
Here is an example Data Set.
[
{
"_id": "636e0beaa13ef73324e613f0",
"status": "ACTIVE",
"inventory": 132,
"parentCategory": [
"Salt"
],
"title": "Aashirvaad MRP: 28Rs Salt 27 kg Bopp Bag (Set of 1 kg x 27)",
"createdAt": "2022-11-11T08:46:34.950Z",
"updatedAt": "2022-11-24T17:43:27.361Z",
"__v": 3,
"verificationData": [
{
"_id": "637c57ebbe783a9a138fc2d3",
"verificationDate": "2022-11-22T05:02:35.155Z",
"items": {
"listingId": "636e0beaa13ef73324e613f0",
"phyiscalVerification": [
{
"verifiedBy": "634534e72ef6462fcb681a39",
"closingStock": 178,
"phyiscalStock": 178,
"differenceInStock": 0,
"verifiedAt": "2022-11-22T10:19:38.388Z",
"_id": "637ca23abe783a9a1394f402"
}
],
"_id": "637ca23abe783a9a1394f401"
},
"yearMonthDayUTC": "2022-11-22"
},
{
"_id": "637d9b65be783a9a13998726",
"verificationDate": "2022-11-23T04:02:45.804Z",
"items": {
"listingId": "636e0beaa13ef73324e613f0",
"phyiscalVerification": [
{
"verifiedBy": "634534e72ef6462fcb681a39",
"closingStock": 161,
"phyiscalStock": 167,
"differenceInStock": 6,
"verifiedAt": "2022-11-23T09:52:36.815Z",
"_id": "637ded64be783a9a13a29d55"
}
],
"_id": "637ded64be783a9a13a29d54"
},
"yearMonthDayUTC": "2022-11-23"
},
{
"_id": "637f0254be783a9a13a94354",
"verificationDate": "2022-11-24T05:34:12.995Z",
"items": {
"listingId": "636e0beaa13ef73324e613f0",
"phyiscalVerification": [
{
"verifiedBy": "634534e72ef6462fcb681a39",
"closingStock": 144,
"phyiscalStock": 146,
"differenceInStock": 2,
"verifiedAt": "2022-11-24T12:02:28.123Z",
"_id": "637f5d54be783a9a13b1039a"
}
],
"_id": "637f5d54be783a9a13b10399"
},
"yearMonthDayUTC": "2022-11-24"
},
{
"_id": "2022-11-25",
"yearMonthDayUTC": "2022-11-25",
"items": null
}
]
},
{
"_id": "62b5c39062ddb963fc64c42d",
"status": "ACTIVE",
"inventory": 10,
"parentCategory": [
"Salt"
],
"finalMeasurementUnit": "kg",
"finalMeasure": "1 kg",
"title": "Marvella Citric Acid Lemon Salt 1 kg Pouch (Set of 500 gm x 2)",
"createdAt": "2022-06-24T14:00:49.052Z",
"updatedAt": "2022-11-21T11:04:21.643Z",
"__v": 2,
"verificationData": [
{
"_id": "2022-11-22",
"yearMonthDayUTC": "2022-11-22",
"items": null
},
{
"_id": "2022-11-23",
"yearMonthDayUTC": "2022-11-23",
"items": null
},
{
"_id": "2022-11-24",
"yearMonthDayUTC": "2022-11-24",
"items": null
},
{
"_id": "2022-11-25",
"yearMonthDayUTC": "2022-11-25",
"items": null
}
]
}
]
This could have array of 100+ SKU's
Our Aggregate Functions is as Follows
let reqData = await userListing.aggregate([
{
$match: {
warehouseId: { $eq: ObjectId(warehouseId) },
parentCategory: { $in: catList },
isWarehouseListing: { $eq: true },
isBlocked: { $ne: true },
isArchived: { $ne: true },
},
},
{ $sort: { whAddedAt: -1 } },
{
$lookup: {
from: "listingstockverifications",
let: { listId: "$_id" },
pipeline: [
{
$match: {
verificationDate: {
$gte: newFromDate,
$lt: newToDate,
},
},
},
{
$project: {
verificationDate: 1,
items: {
$filter: {
input: "$items",
cond: {
$and: [
/* {
"$$this.phyiscalVerification": {
$filter: {
input: "$$this.phyiscalVerification",
as: "psitem",
cond: { $gt: [ "$$psitem.differenceInStock", 0 ] },
},
},
}, */
{
$eq: ["$$this.listingId", "$$listId"],
},
],
},
},
},
yearMonthDayUTC: {
$dateToString: {
format: "%Y-%m-%d",
date: "$verificationDate",
},
},
},
},
{ $unwind: "$items" },
],
as: "stockVerification",
},
},
{
$addFields: {
verificationData: {
$map: {
input: dummyArray,
as: "date",
in: {
$let: {
vars: {
dateIndex: {
$indexOfArray: [
"$stockVerification.yearMonthDayUTC",
"$$date",
],
},
},
in: {
$cond: {
if: { $ne: ["$$dateIndex", -1] },
then: {
$arrayElemAt: ["$stockVerification", "$$dateIndex"],
},
else: {
_id: "$$date",
yearMonthDayUTC: "$$date",
items: null,
},
},
},
},
},
},
},
},
},
{
$project: {
stockVerification: 0,
},
},
]);
At Last now we would like to filter the SKU List the which has following Data
verificationData[].items.phyiscalVerification[].differenceInStock is Greater than or Less than 0
Expected Output in the following Exmaple would be 1st SKUs
as 2nd SKU does not have any Item Data
and even if in 3rd SKU if we got Item Data but should match the following condition
verificationData[].items.phyiscalVerification[].differenceInStock is Greater than or Less than 0
Thank you for taking your time to read and support.
You can add these two following stages to your aggregation, The idea is simple - just filter out all subdocuments that do not match the condition.
Because of the nested structure it's just not the sexiest of pipelines but it will suffice.
db.collection.aggregate([
{
$match: {
$or: [
{
"verificationData.items.phyiscalVerification.differenceInStock": {
$gt: 0
}
},
{
"verificationData.items.phyiscalVerification.differenceInStock": {
$lt: 0
}
}
]
}
},
{
$addFields: {
verificationData: {
$filter: {
input: {
$map: {
input: {
$filter: {
input: "$verificationData",
as: "verification",
cond: {
$ne: [
"$$verification.items",
null
]
}
}
},
as: "top",
in: {
$mergeObjects: [
"$$top",
{
"items": {
"$mergeObjects": [
"$$top.items",
{
phyiscalVerification: {
$filter: {
input: "$$top.items.phyiscalVerification",
as: "pshycical",
cond: {
$ne: [
"$$pshycical.differenceInStock",
0
]
}
}
}
}
]
}
}
]
}
}
},
cond: {
$gt: [
{
$size: "$$this.items.phyiscalVerification"
},
0
]
}
}
}
}
}
])
Mongo Playground

Fill day gaps of two-dimensional timeseries data in MongoDB with aggregate

I have a collection of two-dimensional timeseries data as follows:
[
{
"value" : 9,
"timestamp" : "2020-12-30T02:06:33.000+0000",
"recipeId" : 15
},
{
"value" : 2,
"timestamp" : "2020-12-30T12:04:23.000+0000",
"recipeId" : 102
},
{
"value" : 5,
"timestamp" : "2020-12-30T15:09:23.000+0000",
"recipeId" : 102
},
...
]
The records have a recipeId which is the first level of grouping I'm looking for. All values for a day of a recipe should be summed up. I want an array of timeseries per recipeId. I need the missing days to be filled with a 0. I want this construct to be created for a provided start and end date range.
Some like this for date range of 2020-12-29 to 2020-12-31:
[
[
{
"sum" : 0,
"timestamp" : "2020-12-29",
"recipeId" : 15
},
{
"sum" : 9,
"timestamp" : "2020-12-30",
"recipeId" : 15
},
{
"sum" : 0,
"timestamp" : "2020-12-31",
"recipeId" : 15
},
...
],
[
{
"sum" : 0,
"timestamp" : "2020-12-29",
"recipeId" : 0
},
{
"sum" : 7,
"timestamp" : "2020-12-30",
"recipeId" : 102
},
{
"sum" : 0,
"timestamp" : "2020-12-31",
"recipeId" : 102
},
...
]
]
This is what I currently have and it's only partially solving my requirements. I can't manage to get the last few stages right:
[
{
"$match": {
"timestamp": {
"$gte": "2020-12-29T00:00:00.000Z",
"$lte": "2020-12-31T00:00:00.000Z"
}
}
},
{
"$addFields": {
"timestamp": {
"$dateFromParts": {
"year": { "$year": "$timestamp" },
"month": { "$month": "$timestamp" },
"day": { "$dayOfMonth": "$timestamp" }
}
},
"dateRange": {
"$map": {
"input": {
"$range": [
0,
{
"$trunc": {
"$divide": [
{
"$subtract": [
"2020-12-31T00:00:00.000Z",
"2020-12-29T00:00:00.000Z"
]
},
1000
]
}
},
86400
]
},
"in": {
"$add": [
"2020-12-29T00:00:00.000Z",
{ "$multiply": ["$$this", 1000] }
]
}
}
}
}
},
{ "$unwind": "$dateRange" },
{
"$group": {
"_id": { "date": "$dateRange", "recipeId": "$recipeId" },
"count": {
"$sum": { "$cond": [{ "$eq": ["$dateRange", "$timestamp"] }, 1, 0] }
}
}
},
{
"$group": {
"_id": "$_id.date",
"total": { "$sum": "$count" },
"byRecipeId": {
"$push": {
"k": { "$toString": "$_id.recipeId" },
"v": { "$sum": "$count" }
}
}
}
},
{ "$sort": { "_id": 1 } },
{
"$project": {
"_id": 0,
"timestamp": "$_id",
"total": "$total",
"byRecipeId": {
"$arrayToObject": {
"$filter": { "input": "$byRecipeId", "cond": "$$this.v" }
}
}
}
}
]
which results in:
[
{
"timestamp": "2020-12-29T00:00:00.000Z",
"total": 21,
"byRecipeId": {}
},
{
"timestamp": "2020-12-30T00:00:00.000Z",
"total": 0,
"byRecipeId": {
"15": 9,
"102": 7
}
},
{
"timestamp": "2020-12-31T00:00:00.000Z",
"total": 0,
"byRecipeId": {}
}
]
I'm open to alternative solution of course. For examples I came across this post: https://medium.com/#alexandro.ramr777/fill-missing-values-using-mongodb-aggregation-framework-f011114e83e0 but it doesn't deal with multi-dimensions.
You could use the $redcue function. This code fills the gabs of Minutes for current day. Should be easy to adapt it to give missing Days.
{
$addFields: {
data: {
$reduce: {
input: { $range: [0, 24 * 60] },
initialValue: [],
in: {
$let: {
vars: {
ts: {
$add: [
moment().startOf('day').toDate(),
{ $multiply: ["$$this", 1000 * 60] }
]
}
},
in: {
$concatArrays: [
"$$value",
[{
$cond: {
if: { $in: ["$$ts", "$data.timestamp"] },
then: {
$first: {
$filter: {
input: "$data",
cond: { $eq: ["$$this.timestamp", "$$ts"] }
}
}
},
else: { timestamp: "$$ts", total: 0 }
}
}]
]
}
}
}
}
}
}
}
In my opinion, $reduce is more elegant than $map, however based on my experience the performance is much worse with $reduce.

How to group data by every hour

How do I get counts data grouped by every hour in 24 hours even if data is not present i.e. IF 0 will select 0
MonogDB 3.6
Input
[
{
"_id": ObjectId("5ccbb96706d1d47a4b2ced4b"),
"date": "2019-05-03T10:39:53.108Z",
"id": 166,
"update_at": "2019-05-03T02:45:36.208Z",
"type": "image"
},
{
"_id": ObjectId("5ccbb96706d1d47a4b2ced4c"),
"date": "2019-05-03T10:39:53.133Z",
"id": 166,
"update_at": "2019-05-03T02:45:36.208Z",
"type": "image"
},
{
"_id": ObjectId("5ccbb96706d1d47a4b2ced4d"),
"date": "2019-05-03T10:39:53.180Z",
"id": 166,
"update_at": "2019-05-03T20:45:36.208Z",
"type": "image"
},
{
"_id": ObjectId("5ccbb96706d1d47a4b2ced7a"),
"date": "2019-05-10T10:39:53.218Z",
"id": 166,
"update_at": "2019-12-04T10:45:36.208Z",
"type": "image"
},
{
"_id": ObjectId("5ccbb96706d1d47a4b2ced7b"),
"date": "2019-05-03T10:39:53.108Z",
"id": 166,
"update_at": "2019-05-05T10:45:36.208Z",
"type": "image"
},
{
"_id": ObjectId("5ccbb96706d1d47a4b2cedae"),
"date": "2019-05-03T10:39:53.133Z",
"id": 166,
"update_at": "2019-05-05T10:45:36.208Z",
"type": "image"
},
{
"_id": ObjectId("5ccbb96706d1d47a4b2cedad"),
"date": "2019-05-03T10:39:53.180Z",
"id": 166,
"update_at": "2019-05-06T10:45:36.208Z",
"type": "image"
},
{
"_id": ObjectId("5ccbb96706d1d47a4b2cedab"),
"date": "2019-05-10T10:39:53.218Z",
"id": 166,
"update_at": "2019-12-06T10:45:36.208Z",
"type": "image"
}
]
Implementation
db.collection.aggregate({
$match: {
update_at: {
"$gte": "2019-05-03T00:00:00.0Z",
"$lt": "2019-05-05T00:00:00.0Z"
},
id: {
"$in": [
166
]
}
}
},
{
$group: {
_id: {
$substr: [
"$update_at",
11,
2
]
},
count: {
"$sum": 1
}
},
},
{
$project: {
_id: 0,
hour: "$_id",
count: "$count"
}
},
{
$sort: {
hour: 1
}
})
Actual Output:
{
"count": 2,
"hour": "02"
},
{
"count": 1,
"hour": "20"
}
My expectation code show 24 hours event data is 0 or null and convert from example "02" as "02 AM" , "13" as "01 PM":
Expected Output
{
"count": 0,
"hour": "01" // 01 AM
},
{
"count": 2,
"hour": "02"
},
{
"count": 0,
"hour": "03"
},
{
"count": 0,
"hour": "04"
},
{
"count": 0,
"hour": "05"
},
{
"count": 1,
"hour": "20" // to 08 pm
}
Try this solution:
Explanation
We group by hour to count how many images are uploaded.
Then, we add extra field hour to create time interval (if you had v4.x, there is a better solution).
We flattern hour field (will create new documents) and split first 2 digits to match count and split last 2 digits to put AM / PM periods.
db.collection.aggregate([
{
$match: {
update_at: {
"$gte": "2019-05-03T00:00:00.0Z",
"$lt": "2019-05-05T00:00:00.0Z"
},
id: {
"$in": [
166
]
}
}
},
{
$group: {
_id: {
$substr: [
"$update_at",
11,
2
]
},
count: {
"$sum": 1
}
}
},
{
$addFields: {
hour: [
"0000",
"0101",
"0202",
"0303",
"0404",
"0505",
"0606",
"0707",
"0808",
"0909",
"1010",
"1111",
"1212",
"1301",
"1402",
"1503",
"1604",
"1705",
"1806",
"1907",
"2008",
"2109",
"2210",
"2311"
]
}
},
{
$unwind: "$hour"
},
{
$project: {
_id: 0,
hour: 1,
count: {
$cond: [
{
$eq: [
{
$substr: [
"$hour",
0,
2
]
},
"$_id"
]
},
"$count",
0
]
}
}
},
{
$group: {
_id: "$hour",
count: {
"$sum": "$count"
}
}
},
{
$sort: {
_id: 1
}
},
{
$project: {
_id: 0,
hour: {
$concat: [
{
$substr: [
"$_id",
2,
2
]
},
{
$cond: [
{
$gt: [
{
$substr: [
"$_id",
0,
2
]
},
"12"
]
},
" PM",
" AM"
]
}
]
},
count: "$count"
}
}
])
MongoPlayground
There's no "magic" solution, you'll have to hardcode it into your aggregation:
Heres an example using Mongo v3.2+ syntax with some $map and $filter magic:
db.collection.aggregate([
{
$match: {
update_at: {
"$gte": "2019-05-03T00:00:00.0Z",
"$lt": "2019-05-05T00:00:00.0Z"
},
id: {"$in": [166]}
}
},
{
$group: {
_id: {$substr: ["$update_at", 11, 2]},
count: {"$sum": 1}
}
},
{
$group: {
_id: null,
hours: {$push: {hour: "$_id", count: "$count"}}
}
},
{
$addFields: {
hours: {
$map: {
input: {
$concatArrays: [
"$hours",
{
$map: {
input: {
$filter: {
input: ["00", "01", "02", "03", "04", "05", "06", "07", "08", "09", "10", "11", "12", "13", "14", "15", "16", "17", "18", "19", "20", "21", "22", "23"],
as: "missingHour",
cond: {
$not: {
$in: [
"$$missingHour",
{
$map: {
input: "$hours",
as: "hourObj",
in: "$$hourObj.hour"
}
}
]
}
}
}
},
as: "missingHour",
in: {hour: "$$missingHour", count: 0}
}
}
]
},
as: "hourObject",
in: {
count: "$$hourObject.count",
hour: {
$cond: [
{$eq: [{$substr: ["$$hourObject.hour", 0, 1]}, "0"]},
{$concat: ["$$hourObject.hour", " AM"]},
{
$concat: [{
$switch: {
branches: [
{case: {$eq: ["$$hourObject.hour", "13"]}, then: "1"},
{case: {$eq: ["$$hourObject.hour", "14"]}, then: "2"},
{case: {$eq: ["$$hourObject.hour", "15"]}, then: "3"},
{case: {$eq: ["$$hourObject.hour", "16"]}, then: "4"},
{case: {$eq: ["$$hourObject.hour", "17"]}, then: "5"},
{case: {$eq: ["$$hourObject.hour", "18"]}, then: "6"},
{case: {$eq: ["$$hourObject.hour", "19"]}, then: "7"},
{case: {$eq: ["$$hourObject.hour", "20"]}, then: "8"},
{case: {$eq: ["$$hourObject.hour", "21"]}, then: "9"},
{case: {$eq: ["$$hourObject.hour", "22"]}, then: "10"},
{case: {$eq: ["$$hourObject.hour", "23"]}, then: "11"},
],
default: "None"
}
}, " PM"]
}
]
}
}
}
}
}
},
{
$unwind: "$hours"
},
{
$project: {
_id: 0,
hour: "$hours.hour",
count: "$hours.count"
}
},
{
$sort: {
hour: 1
}
}
]);
A short explanation of the $addFields stage: we first add hours that we're missing, we then merge the two arrays (of the original found hours and the "new" missing hours), finally we convert to the required output ("01" to "01 AM").
If you're using Mongo v4+ I recommend you change the $group _id stage to use $dateFromString as its more consistent.
_id: {$hour: {$dateFromString: {dateString: "$update_at"}}}
If you do do that, you'll have to update the $filter and $map section to use numbers and not strings and eventually using $toString to cast into the format you want, hence the v4+ requirement.
You should store date values as Date objects instead of strings. I would do the formatting like this:
db.collection.aggregate(
[
{ $match: { ... } },
{
$group: {
_id: { h: { $hour: "$update_at" } },
count: { $sum: 1 }
}
},
{
$project: {
_id: 0,
hour: {
$switch: {
branches: [
{ case: { $lt: ["$_id.h", 10] }, then: { $concat: ["0", { $toString: "$_id.h" }, " AM"] } },
{ case: { $lt: ["$_id.h", 13] }, then: { $concat: [{ $toString: "$_id.h" }, " AM"] } },
{ case: { $lt: ["$_id.h", 22] }, then: { $concat: ["0", { $toString: { $subtract: ["$_id.h", 12] } }, " PM"] } },
{ case: { $lt: ["$_id.h", 24] }, then: { $concat: [{ $toString: { $subtract: ["$_id.h", 12] } }, " PM"] } }
]
}
},
hour24: "$_id.h",
count: 1
}
},
{ $sort: { hour24: 1 } }
])
As non-American I am not familiar with AM/PM rules, esp. for midnight and midday but I guess you get the principle.
Here is the query you can test it out, for MongoDB 4.0+
i will be improving query and update
const query = [{
$match: {
update_at: {
"$gte": ISODate("2019-05-03T00:00:00.0Z"),
"$lt": ISODate("2019-05-05T00:00:00.0Z")
},
id: {
"$in": [
166
]
}
}
},
{
$group: {
_id: { $hour: "$update_at" },
count: {
"$sum": 1
}
},
},
{
$addFields: {
hourStr: { $toString: { $cond: { if: { $gte: ["$_id", 12] }, then: { $subtract: [12, { $mod: [24, '$_id'] }] }, else: "$_id" } } },
}
},
{
$project: {
formated: { $concat: ["$hourStr", { $cond: { if: { $gt: ["$_id", 12] }, then: " PM", else: " AM" } }] },
count: "$count",
hour: 1,
}
}]
If you want to output in Indian Time formate. then below code work!
const query = [
{
$match: {
update_at: {
"$gte": ISODate("2019-05-03T00:00:00.0Z"),
"$lt": ISODate("2019-05-05T00:00:00.0Z")
},
id: {
"$in": [
166
]
}
}
},
{
$project: {
"h": { "$hour": { date: "$update_at", timezone: "+0530" } },
}
},
{
$group:
{
_id: { $hour: "$h" },
count: { $sum: 1 }
}
}
];

Get objects containing max values for multiple fields using aggregation in mongodb

I want to fetch the documents having highest value for a list of specifics fields. I don't know if it's possible in only one request.
Consider below data:
_id:1, kills:12, deaths:6, assists:1
_id:2, kills:2, deaths:2, assists:22
_id:3, kills:1, deaths:2, assists:3
_id:4, kills:0, deaths:23, assists:4
_id:5, kills:6, deaths:3, assists:5
_id:6, kills:7, deaths:1, assists:6
Answer should be something like
maxKills: { _id:1, kills:12, deaths:6, assists:1 },
maxDeaths: { _id:4, kills:0, deaths:23, assists:4 },
maxAssists: { _id:2, kills:2, deaths:2, assists:22 },
I have tried several queries, but I can't get the whole objects containing the max values.
db.coll.aggregate([{
$group: {
_id: null,
kills: { $max: "$stats.kills" },
deaths: { $max: "$stats.deaths" },
assists: { $max: "$stats.assists" },
}
}])
For example here I have all the max values I want but I don't get the whole matches Objects.
---- UPDATE ----
With this answer https://stackoverflow.com/a/33361913/9188650, I've made it works but I receive data in a not really user friendly way.
{
"$group": {
"_id": null,
"maxKills": { "$max": "$stats.kills" },
"maxDeaths": { "$max": "$stats.deaths" },
"maxAssists": { "$max": "$stats.assists" },
"matches": {
"$push": {
"champion": "$champion",
"gameId": "$gameId",
"kills": "$stats.kills",
"deaths": "$stats.deaths",
"assists": "$stats.assists",
}
}
}
},
{
"$project": {
"_id": 0,
"maxKills": 1,
"maxDeaths": 1,
"maxAssists": 1,
"matches": {
"$setDifference": [
{
"$map": {
"input": "$matches",
"as": "match",
"in": {
$switch: {
branches: [
{ case: { $eq: ["$maxKills", "$$match.kills"] }, then: "$$match" },
{ case: { $eq: ["$maxDeaths", "$$match.deaths"] }, then: "$$match" },
{ case: { $eq: ["$maxAssists", "$$match.assists"] }, then: "$$match" },
],
default: false
}
}
}
},
[false]
]
}
}
}
It will returns:
{
"maxKills": 25,
"maxDeaths": 20,
"maxAssists": 39,
"matches": [
{
"champion": {
"id": 145,
"name": "Kai'Sa",
},
"gameId": 4263819967,
"kills": 25,
"deaths": 3,
"assists": 16
},
{
"champion": {
"id": 8,
"name": "Vladimir",
},
"gameId": 4262731529,
"kills": 8,
"deaths": 20,
"assists": 3
},
{
"champion": {
"id": 22,
"name": "Ashe",
},
"gameId": 4340383097,
"kills": 9,
"deaths": 7,
"assists": 39
},
{
"champion": {
"id": 23,
"name": "Tryndamere",
},
"gameId": 4352236936,
"kills": 25,
"deaths": 6,
"assists": 22
}
]
}
My last issue are cases when multiple objects have the same max value (as the example above, 2 matches have 25 kills). I only want the oldest one in these cases.
You can do it easier by using $filter and $arrayElemAt after $group stage:
db.collection.aggregate([
{
$group: {
_id: null,
maxKills: { $max: "$kills" },
maxDeaths: { $max: "$deaths" },
maxAssists: { $max: "$assists" },
docs: { $push: "$$ROOT" }
}
},
{
$project: {
_id: 0,
maxKills: { $arrayElemAt: [ { $filter: { input: "$docs", cond: { $eq: [ "$$this.kills", "$maxKills" ] } } }, 0 ] },
maxDeaths: { $arrayElemAt: [ { $filter: { input: "$docs", cond: { $eq: [ "$$this.deaths", "$maxDeaths" ] } } }, 0 ] },
maxAssists: { $arrayElemAt: [ { $filter: { input: "$docs", cond: { $eq: [ "$$this.assists", "$maxAssists" ] } } }, 0 ] }
}
}
])
Mongo Playground

Use $size with $sort in array and sub array

Here's the structure part of my collection:
_id: ObjectId("W"),
names: [
{
number: 1,
subnames: [ { id: "X", day: 1 }, { id: "Y", day: 10 }, { id: "Z", day: 2 } ],
list: ["A","B","C"],
day: 1
},
{
number: 2,
day: 5
},
{
number: 3,
subnames: [ { id: "X", day: 8 }, { id: "Z", day: 5 } ],
list: ["A","C"],
day: 2
},
...
],
...
I use this request:
db.publication.aggregate( [ { $match: { _id: ObjectId("W") } }, { $group: { _id: "$_id", SizeName: { $first: { $size: { $ifNull: [ "$names", [] ] } } }, names: { $first: "$names" } } }, { $unwind: "$names" }, { $sort: { "names.day": 1 } }, { $group: { _id: "$_id", SzNames: { $sum: 1 }, names: { $push: { number: "$names.number", subnames: "$names.subnames", list: "$names.list", SizeList: { $size: { $ifNull: [ "$names.list", [] ] } } } } } } ] );
but I would now use $sort for my names array AND my subnames array to obtain this result (subnames may not exist) :
_id: ObjectId("W"),
names: [
{
number: 2,
SizeList: 0,
day: 5
},
{
number: 3,
subnames: [ { id: "Z", day: 5 }, { id: "X", day: 8 } ],
list: ["A","C"],
SizeList: 2,
day: 2
},
{
number: 1,
subnames: [ { id: "X", day: 1 }, { id: "Z", day: 2 }, { id: "Y", day: 10 } ],
list: ["A","B","C"],
SizeList: 3,
day: 1
}
...
],
...
Can you help me ?
You can do this, but with great difficulty. I for one would gladly vote for an inline version of $sort along the lines of the $map operator. That would makes things so much easier.
For now though you need to de-construct and re-build the arrays after sorting. And you have to be very careful about this. Hence make false arrays with a single entry before processing $unwind:
db.publication.aggregate([
{ "$project": {
"SizeNames": {
"$size": {
"$ifNull": [ "$names", [] ]
}
},
"names": { "$ifNull": [{ "$map": {
"input": "$names",
"as": "el",
"in": {
"SizeList": {
"$size": {
"$ifNull": [ "$$el.list", [] ]
}
},
"SizeSubnames": {
"$size": {
"$ifNull": [ "$$el.subnames", [] ]
}
},
"number": "$$el.number",
"day": "$$el.day",
"subnames": { "$ifNull": [ "$$el.subnames", [0] ] },
"list": "$$el.list"
}
}}, [0] ] }
}},
{ "$unwind": "$names" },
{ "$unwind": "$names.subnames" },
{ "$sort": { "_id": 1, "names.subnames.day": 1 } },
{ "$group": {
"_id": {
"_id": "$_id",
"SizeNames": "$SizeNames",
"names": {
"SizeList": "$names.SizeList",
"SizeSubnames": "$names.SizeSubnames",
"number": "$names.number",
"list": "$names.list",
"day": "$names.day"
}
},
"subnames": { "$push": "$names.subnames" }
}},
{ "$sort": { "_id._id": 1, "_id.names.day": 1 } },
{ "$group": {
"_id": "$_id._id",
"SizeNames": { "$first": "$_id.SizeNames" },
"names": {
"$push": { "$cond": [
{ "$ne": [ "$_id.names.SizeSubnames", 0 ] },
{
"number": "$_id.names.number",
"subnames": "$subnames",
"list": "$_id.names.list",
"SizeList": "$_id.names.SizeList",
"day": "$_id.names.day"
},
{
"number": "$_id.names.number",
"list": "$_id.names.list",
"SizeList": "$_id.names.SizeList",
"day": "$_id.names.day"
}
]}
}
}},
{ "$project": {
"SizeNames": 1,
"names": {
"$cond": [
{ "$ne": [ "$SizeNames", 0 ] },
"$names",
[]
]
}
}}
])
You can kind of "hide away" the original empty array from the inner document as shown, but it's really difficult to remove all presence of the outer "names" array without pulling a similar conditional array "push" technique, and that really isn't a practical approach.
If all of this is just about sorting array elements in individual documents though, the aggregation framework should not be the tool to do this. It can be done as shown, but per document this is much easier to do in client side code.
Output:
{
"_id" : ObjectId("54b5cff8102f292553ce9bb5"),
"SizeNames" : 3,
"names" : [
{
"number" : 1,
"subnames" : [
{
"id" : "X",
"day" : 1
},
{
"id" : "Z",
"day" : 2
},
{
"id" : "Y",
"day" : 10
}
],
"list" : [
"A",
"B",
"C"
],
"SizeList" : 3,
"day" : 1
},
{
"number" : 3,
"subnames" : [
{
"id" : "Z",
"day" : 5
},
{
"id" : "X",
"day" : 8
}
],
"list" : [
"A",
"C"
],
"SizeList" : 2,
"day" : 2
},
{
"number" : 2,
"SizeList" : 0,
"day" : 5
}
]
}