MongoDB aggregating multiple arrays of objects based on shared key - mongodb

I'm writing a query to calculate multiple metrics for each user in my DB.
I've calculated all of the metrics, and have a structure like this
{
"metric1": [{"user_id": 1, "val": 13},{"user_id": 2, "val": 100}],
"metric2": [{"user_id": 2, "val": 29},{"user_id": 1, "val": 123}],
"metric3": [{"user_id": 1, "val": 46},{"user_id": 2, "val": 111]
}
I'm trying to convert the above into this structure
{
"user_id": [1,2],
"metric1": [13, 100],
"metric2": [29,123],
"metric3": [46,111]
}
So that I can display a table showing each user and the three metrics (one metric per column, and one user per row).

considering that your data is what you've said:
{
"metric1": [
{"id1": 1}, {"id2": 2}
],
"metric2": [
{"id2": 22}, {"id1": 11}
],
"metric3": [
{"id2": 222}, {"id1": 111}
]
}
all you've to do is using $unwind to be able to break the array and then $objectToArray to have access to keys
db.blah.aggregate([
{ $unwind: '$metric1' },
{ $unwind: '$metric2' },
{ $unwind: '$metric3' },
{ $project: {'metric1': { $objectToArray: '$metric1' }, 'metric2': { $objectToArray: '$metric2' }, 'metric3': { $objectToArray: '$metric3' }} },
{ $sort: { 'metric1.k' : -1} },
{ $sort: { 'metric2.k' : -1} },
{ $sort: { 'metric3.k' : -1} },
{ $unwind: '$metric1' },
{ $unwind: '$metric2' },
{ $unwind: '$metric3' },
{ $group: {
_id: null,
user_id: { $addToSet: '$metric1.k' },
metric1: { $addToSet: '$metric1.v' },
metric2: { $addToSet: '$metric2.v' },
metric3: { $addToSet: '$metric3.v' },
} },
{ $project: { _id: 0 } }
]).pretty()
which results
{
"user_id" : [
"id1",
"id2"
],
"metric1" : [
1,
2
],
"metric2" : [
11,
22
],
"metric3" : [
111,
222
]
}

Related

get rank in mongodb with date range

I have following stat data stored daily for users.
{
"_id": {
"$oid": "638df4e42332386e0e06d322"
},
"appointment_count": 1,
"item_id": 2,
"item_type": "user",
"company_id": 5,
"created_date": "2022-12-05",
"customer_count": 1,
"lead_count": 1,
"door_knocks": 10
}
{
"_id": {
"$oid": "638f59a9bf33442a57c3aa99"
},
"lead_count": 2,
"item_id": 2,
"item_type": "user",
"company_id": 5,
"created_date": "2022-12-06",
"video_viewed": 2,
"door_knocks": 9
}
And I'm using the following query to get the items by rank
user_stats_2022_12.aggregate([{"$match":{"company_id":5,"created_date":{"$gte":"2022-12-04","$lte":"2022-12-06"}}},{"$setWindowFields":{"partitionBy":"$company_id","sortBy":{"door_knocks":-1},"output":{"item_rank":{"$denseRank":{}},"stat_sum":{"$sum":"$door_knocks"}}}},{"$facet":{"metadata":[{"$count":"total"}],"data":[{"$skip":0},{"$limit":100},{"$sort":{"item_rank":1}}]}}])
It's giving me the rank but with the above data, the record with item_id: 2 are having different rank for same item_id. So I wanted to group them by item_id and then applied rank.
It's a little messy, but here's a playground - https://mongoplayground.net/p/JrJOo4cl9X1.
If you're going to sort by knocks after grouping, I'm assuming that you'll want the sum of door_knocks for a given item_id for this sort.
db.collection.aggregate([
{
$match: {
company_id: 5,
created_date: {
"$gte": "2022-12-04",
"$lte": "2022-12-06"
}
}
},
{
$group: {
_id: {
item_id: "$item_id",
company_id: "$company_id"
},
docs: {
$push: "$$ROOT"
},
total_door_knocks: {
$sum: "$door_knocks"
}
}
},
{
$setWindowFields: {
partitionBy: "$company_id",
sortBy: {
total_door_knocks: -1
},
output: {
item_rank: {
"$denseRank": {}
},
stat_sum: {
"$sum": "$total_door_knocks"
}
}
}
},
{
$unwind: "$docs"
},
{
$project: {
_id: "$docs._id",
appointment_count: "$docs.appointment_count",
company_id: "$docs.company_id",
created_date: "$docs.created_date",
customer_count: "$docs.customer_count",
door_knocks: "$docs.door_knocks",
item_id: "$docs.item_id",
item_type: "$docs.item_type",
lead_count: "$docs.lead_count",
item_rank: 1,
stat_sum: 1,
total_door_knocks: 1
}
},
{
$facet: {
metadata: [
{
"$count": "total"
}
],
data: [
{
"$skip": 0
},
{
"$limit": 100
},
{
"$sort": {
"item_rank": 1
}
}
]
}
}
])

Get current state from snapshot documents - mongoDB

I'm trying to get a list of current holders at specific times from a collection. My collection looks like this:
[
{
"time": 1,
"holdings": [
{ "owner": "A", "tokens": 2 },
{ "owner": "B", "tokens": 1 }
]
},
{
"time": 2,
"holdings": [
{ "owner": "B", "tokens": 2 }
]
},
{
"time": 3,
"holdings": [
{ "owner": "A", "tokens": 3 },
{ "owner": "B", "tokens": 1 },
{ "owner": "C", "tokens": 1 }
]
},
{
"time": 4,
"holdings": [
{ "owner": "C", "tokens": 0 }
]
}
]
tokens show the current holdings of an owner if the holdings have changed to the last document. I would like to change the collection so that holdings always includes the full current holdings for any point in time.
At time: 1, the holdings are: A: 2, B: 1.
At time: 2, the holdings are: A: 2, B: 2. The collections does not include A's holdings however, because they haven't changed. So what I'd like to get is:
[
{
"time": 1,
"holdings": [
{ "owner": "A", "tokens": 2 },
{ "owner": "B", "tokens": 1 }
]
},
{
"time": 2,
"holdings": [
{ "owner": "A", "tokens": 2 }, // merged from prev doc.
{ "owner": "B", "tokens": 2 }
]
},
{
"time": 3,
"holdings": [
{ "owner": "A", "tokens": 3 },
{ "owner": "B", "tokens": 1 },
{ "owner": "C", "tokens": 1 }
]
},
{
"time": 4,
"holdings": [
{ "owner": "A", "tokens": 3 }, // merged from prev
{ "owner": "B", "tokens": 1 }, // merged from prev
{ "owner": "C", "tokens": 0 }
]
}
]
From what I understand $mergeObjects does that, but I don't understand how I can merge all previous docs in order up to the current doc for each doc. So I'm looking for a way to combine setWindowFields with mergeObjects I think.
This is a nice challenge.
So far, I got this complicated solution:
Get all of our timestamps in all of our documents. This is the purpose of the first 4 steps. $setWindowFields is used to accumulate this data.
$group by owner and calculate the empty timestamps as wantedTimes- next 5 steps.
$set empty timestamps with tokens: null to be filled with actual data and $unwind to separate - next 3 steps
Use $setWindowFields to find the last known token for each owner at each timestamp.
Fill this last known state for documents with unknown token - 2 steps
$group and format answer:
db.collection.aggregate([
{
$setWindowFields: {
sortBy: {time: 1},
output: {
allTimes: {$addToSet: "$time", window: {documents: ["unbounded", "current"]}
}
}
}
},
{
$setWindowFields: {
sortBy: {time: -1},
output: {
allTimes: {$addToSet: "$allTimes", window: {documents: ["unbounded", "current"]}
}
}
}
},
{
$set: {
allTimes: {
$reduce: {
input: "$allTimes",
initialValue: [],
in: {"$concatArrays": ["$$value", "$$this"]}
}
}
}
},
{$set: {allTimes: {$setIntersection: "$allTimes"}}},
{$unwind: "$holdings"},
{$sort: {time: 1}},
{$group: { _id: "$holdings.owner",
tokens: {$push: {tokens: "$holdings.tokens", time: "$time"}},
times: {$push: "$time"}, firstTime: {$first: "$time"},
allTimes: {$first: "$allTimes"}}
},
{
$addFields: {
wantedTimes: {
$filter: {
input: "$allTimes",
as: "item",
cond: {$gte: ["$$item", "$firstTime"]}
}
}
}
},
{
$project: {
tokens: 1,
wantedTimes: {$setDifference: ["$wantedTimes", "$times"]}
}
},
{
$set: {
data: {
$map: {
input: "$wantedTimes",
as: "item",
in: {time: "$$item", tokens: null}
}
}
}
},
{$project: {tokens: {"$concatArrays": ["$tokens", "$data"]}}},
{$unwind: "$tokens"},
{
$setWindowFields: {
partitionBy: "$_id",
sortBy: {"tokens.time": 1},
output: {
lastTokens: {
$push: "$tokens.tokens",
window: {documents: ["unbounded", "current"]}
}
}
}
},
{
$set: {
lastTokens: {
$filter: {
input: "$lastTokens",
as: "item",
cond: {$ne: ["$$item", null]}
}
}
}
},
{
$set: {
"tokens.tokens": {$ifNull: ["$tokens.tokens", {$last: "$lastTokens"}]}
}
},
{
$group: {
_id: "$tokens.time",
holdings: {$push: {owner: "$_id", tokens: "$tokens.tokens" }}
}
},
{$project: {time: "$_id", holdings: 1, _id: 0}},
{$sort: {time: 1}}
])
Playground example
From a performance perspective I recommend you split it into 2 calls, the first will be a quick findOne just to get the maximum time value in the collection.
Once you have that value the pipeline can be much leaner:
const maxItem = await db.collection.findOne({}).sort({ time: -1 });
db.collection.aggregate([
{
$unwind: "$holdings"
},
{
$group: {
_id: "$holdings.owner",
times: {
$push: {
time: "$time",
tokens: "$holdings.tokens"
}
},
minTime: {
$min: "$time"
}
}
},
{
$addFields: {
times: {
$reduce: {
input: {
$range: [
"$minTime",
maxItem.time + 1 // this is max time
]
},
initialValue: {
values: [],
lastIndex: 0
},
in: {
values: {
"$concatArrays": [
"$$value.values",
[
{
$cond: [
{
$in: [
"$$this",
"$times.time"
]
},
{
"$arrayElemAt": [
"$times",
"$$value.lastIndex"
]
},
{
"$mergeObjects": [
{
tokens: 0
},
{
"$arrayElemAt": [
"$times",
{
$subtract: [
"$$value.lastIndex",
1
]
}
]
},
{
time: "$$this"
}
]
}
]
}
]
]
},
lastIndex: {
$cond: [
{
$in: [
"$$this",
"$times.time"
]
},
{
$sum: [
"$$value.lastIndex",
1
]
},
"$$value.lastIndex"
]
}
}
}
}
}
},
{
$unwind: "$times.values"
},
{
$group: {
_id: "$times.values.time",
holdings: {
$push: {
owner: "$_id",
tokens: "$times.values.tokens"
}
}
}
},
{
$project: {
_id: 0,
time: "$_id",
holdings: 1
}
},
{
$sort: {
time: 1
}
}
])
This is still quite a heavy query as it requires to $unwind and $group the entire collection, however there is no workaround this due to the requirements. if the collection is too big for this approach I recommend iteration owner by owner, or time by time and doing separate updates accordingly.
Mongo Playground
If you don't care about performance at all and want it in a single query you can still use the same pipeline, you will have to first extract the max time in the collection, this will require you to add an initial $group stage, like so:
db.collection.aggregate([
{
$group: {
_id: null,
maxTime: {
$max: "$time"
},
roots: {
$push: "$$ROOT"
}
}
},
{
$unwind: "$roots"
},
{
$replaceRoot: {
newRoot: {
"$mergeObjects": [
"$roots",
{
maxTime: "$maxTime"
}
]
}
}
},
... same pipeline ...
])

MongoDB Count With Condition within Project with $eq

I'm trying to count my "$attendance.status" with aggregation mongodb.
I've get my data with relations. then i want to count by relation columns like 'present', 'off', etc.
code
Employee.aggregate([
{
$lookup: {
from: "Attendance",
let: { employeeId: "$_id" },
pipeline: [
{
$match: {
$and: [
{ $expr: { $eq: ["$employeeId", "$$employeeId"] } },
{ isApproved: true },
{
createdAt: {
$gte: startOfMonth.toDate(),
$lte: endOfMonth.toDate(),
},
},
],
},
},
],
as: "attendance",
},
},
{
$project: {
_id: 1,
username: 1,
name: 1,
attendance: 1,
present: { $sum: { $eq: ["$attendance.status", "present"] } },
},
},
]);
But why cannot count my column?
i use $eq, with $sum then count the result. but the result is 0
{
"username": "Ethyl",
"name": "Kuhn",
"id": "614d43cde735f3e601dea165",
"attendance": [
{
"_id": "614d43cde735f3e601dea16f",
"status": "present",
"start": "2021-09-24T03:19:41.645Z",
"employeeId": "614d43cde735f3e601dea165",
"isApproved": true
},
],
"present": 0,
"sick": 0,
"off": 0,
},

How to get decimal value after avg calculation in mongodb

I have following 2 collection schema
images:{
imageId:"string", avgRating:{ rating1:decimal,rating2:decimal}, ratingCount:int}
}
ratings:{
imageId:"string", rating1:decimal, rating2:decimal
}
//here rating1 nd rating2 are ratings for different features(just according to my requirements)
so I am calculating avg as follows
db.images.aggregate([
{
$match: {
imageId: "someid",
},
},
{
$lookup:
{
from: "ratings",
let: {id: '$imageId'},
pipeline: [
{
$match: {
{
$eq: ['$imageId','$$id']
},
},
},{
$group:
{
_id: 0,
aggRating1: {$avg: "$rating1"},
aggRating2: {$avg: '$rating2'},
count: {$sum: 1}
}
},
{$project: {_id: 0,count:1,aggRating1:1,aggRating2:1}},
],
as: "rating"
}
},
{
$set: {
ratingCount: '$count',
'avgRating.rating1':'$review.aggRating1'
'avgRating.rating2':'$review.aggRating2'
}
},
]);
I am getting results like this
"data":[
{
"_id": "somedocId",
"imageId":"someid",
"ratingCount": 10,
"avgRating": {
"aggRating1": [
"rating1":{
"$numberDecimal": "3.25"
}],
"aggRating2": [
"rating2":{
"$numberDecimal": "3.25"
}]
},
"rating":[
{
"aggRating1": {
"$numberDecimal": "3.25"
},
"aggRating2": {
"$numberDecimal": "3.25"
},
"count": 10
}
],
}
]
So if u see when I set the aggRating1 and aggRating2 from rating lookup I got, it converts to array. But in rating it is an object. Idk why is that happening.
So how do i get just the decimal value of the avg results? and not like above? :/

MongoDB avoid duplicates using $addToSet in aggregation pipeline

there is aggregation pipeline:
db.getCollection('yourCollection').aggregate(
{
$unwind: {
path: "$dates",
includeArrayIndex: "idx"
}
},
{
$project: {
_id: 0,
dates: 1,
numbers: { $arrayElemAt: ["$numbers", "$idx"] },
goals: { $arrayElemAt: ["$goals", "$idx"] },
durations: { $arrayElemAt: ["$durations", "$idx"] }
}
}
)
which perform on the following data (sample documents):
{
"_id" : ObjectId("52d017d4b60fb046cdaf4851"),
"dates" : [
1399518702000,
1399126333000,
1399209192000,
1399027545000
],
"dress_number" : "4",
"name" : "J. Evans",
"numbers" : [
"5982",
"5983",
"5984",
"5985"
],
"goals": [
"1",
"0",
"4",
"2"
],
"durations": [
"78",
"45",
"90",
"90"
]
}
{
"_id" : ObjectId("57e250c1b60fb0213d06737c"),
"dates" : [
"1399027545000",
"1399101432000",
"1399026850000",
"1399904504000"
],
"dress_number" : "6",
"name" : K. Mitnick,
"numbers" : [
"0982",
"0981",
"0958",
"0982"
],
"durations" : [
98,
110,
66,
92
],
"goals" : [
"2",
"3",
"0",
"1"
]
}
The query works good, but there are duplicate records so I'm trying to use $addToSet operator to avoid duplicates:
db.getCollection('yourCollection').aggregate(
{
$match: {
"number": number
}
},
{
$unwind: {
path: "$dates",
includeArrayIndex: "idx"
}
},
$group: {
_id: '$_id',
dates: { $addToSet: '$dates' }
},
{
$project: {
_id: 0,
dates: 1,
numbers: { $arrayElemAt: ["$numbers", "$idx"] },
goals: { $arrayElemAt: ["$goals", "$idx"] },
durations: { $arrayElemAt: ["$durations", "$idx"] }
}
}
)
but I got only dates (other field are null)
{ dates:
[ '1399026850000',
'1399101432000',
'1399027545000',
'1399904504000',
'1399024474000',
'1399126333000' ],
numbers: null,
goals: null,
durations: null },
{ dates:
[ '1399027545000',
'1399024474000',
'1399518702000',
'1399126333000',
'1399209192000',
'1399356651000' ],
numbers: null,
goals: null,
conversation_durations: null },
{ dates:
[ '1399026850000',
'1399101432000',
'1399027545000',
'1399904504000',
'1399024474000' ],
numbers: null,
goals: null,
durations: null }
Does anybody know where is the problem?
You need to include the fields within the $group pipeline using the $first operator as follows:
db.getCollection('yourCollection').aggregate([
{ "$unwind": "$dates" },
{
"$group": {
"_id": "$_id",
"dates": { "$addToSet": "$dates" },
"numbers": { "$first": "$numbers" },
"goals": { "$first": "$goals" },
"durations": { "$first": "$durations" }
}
},
{ "$unwind": {
"path": "$dates",
"includeArrayIndex": "idx"
} },
{
"$project": {
"_id": 0,
"dates": 1,
"numbers": { "$arrayElemAt": ["$numbers", "$idx"] },
"goals": { "$arrayElemAt": ["$goals", "$idx"] },
"durations": { "$arrayElemAt": ["$durations", "$idx"] }
}
}
])
or using $setUnion to eliminate duplicates as:
db.getCollection('yourCollection').aggregate([
{
"$project": {
"_id": 0,
"dates": { "$setUnion": ["$dates", "$dates"] },
"numbers": 1,
"goals": 1,
"durations": 1
}
}
{ "$unwind": {
"path": "$dates",
"includeArrayIndex": "idx"
} },
{
"$project": {
"_id": 0,
"dates": 1,
"dateIndex": "$idx",
"numbers": { "$arrayElemAt": ["$numbers", "$idx"] },
"goals": { "$arrayElemAt": ["$goals", "$idx"] },
"durations": { "$arrayElemAt": ["$durations", "$idx"] }
}
}
])