keep latest record based on multiple grouping - mongodb

I wrote a multi-stage pipeline to arrive at this set of documents:
{'_id': '1234'),
'info': [{'type': 'patient',
'patient_id': 'p1'},
{'type': 'doc',
'doc_id': 'd1'},
{'type': 'ldlc',
'dt': datetime.datetime(2018, 10, 29, 12, 7, 23),
'val': 136},
{'type': 'bp',
'dt': datetime.datetime(2014, 8, 25, 4, 2, 27),
'val': [{'dias': 74}, {'sys': 105}]}]},
{'_id': '1235'),
'info': [{'type': 'patient',
'patient_id': 'p2'},
{'type': 'doc',
'doc_id': 'd1'},
{'type': 'ldlc',
'dt': datetime.datetime(2016, 3, 31, 21, 30, 34),
'val': 153},
{'type': 'bp',
'dt': datetime.datetime(2013, 7, 3, 18, 3, 12),
'val': [{'dias': 86}, {'sys': 101}]},
{'type': 'bp',
'dt': datetime.datetime(2016, 3, 15, 18, 35, 25),
'val': [{'dias': 85}, {'sys': 108}]},
{'type': 'ldlc',
'dt': datetime.datetime(2018, 10, 1, 12, 7, 23),
'val': 144}]}
I am using pymongo, hence the datetime objects.
Now in each document I only want to keep the last recorded values (sort by dt) for 'ldlc' and 'bp'.
I would prefer it to be as:
{
"_id": '1234',
"patient_id": "p1",
"doc_id": "d1".
"sys": 105,
"dias": 74,
"ldlc": 136
},
{
"_id": '1235',
"patient_id": "p2",
"doc_id": "d1".
"sys": 108,
"dias": 85,
"ldlc": 144
}
since the source documents are generated in an aggregation pipeline, i want to add $project and $group stages after that in order to product the desired result.
Thanks for your help!

There are different approaches to achieve this use case.
I started with $sort to sort based on dates. And then used $facet for parallel grouping.Since you need to keep only the latest record, $last is used to get required values.
Your aggregation can look like below:
db.collection.aggregate([
{
$unwind: "$info"
},
{
$sort: {
"info.dt": 1
}
},
{
"$facet": {
"ldlc": [
{
"$match": {
"info.type": "ldlc"
}
},
{
"$group": {
"_id": "$_id",
"ldlc": {
$last: "$info.val"
}
}
}
],
"bp": [
{
"$match": {
"info.type": "bp"
}
},
{
"$group": {
"_id": "$_id",
"bp": {
$last: "$info.val"
}
}
},
{
$unwind: "$bp"
}
],
"others": [
{
$match: {
$or: [
{
"info.type": "patient"
},
{
"info.type": "doc"
}
]
}
},
{
"$group": {
"_id": "$_id",
"ids": {
$push: {
p: "$info.patient_id",
d: "$info.doc_id"
}
}
}
},
{
$unwind: "$ids"
}
],
}
},
{
$project: {
data: {
$concatArrays: [
"$others",
"$ldlc",
"$bp"
]
}
}
},
{
$unwind: "$data"
},
{
"$group": {
"_id": "$data._id",
"val": {
$push: {
patient_id: "$data.ids.p",
doc_id: "$data.ids.d",
ldlc: "$data.ldlc",
dias: "$data.bp.dias",
sys: "$data.bp.sys"
}
}
}
},
{
"$project": {
_id: 1,
"v": {
"$reduce": {
"input": "$val",
"initialValue": {},
"in": {
"$mergeObjects": [
"$$value",
"$$this"
]
}
}
}
}
},
{
"$project": {
_id: 1,
patient_id: "$v.patient_id",
doc_id: "$v.doc_id",
ldlc: "$v.ldlc",
dias: "$v.dias",
sys: "$v.sys"
}
}
])
Check out the query result here: Mongo Playground
PS: This may not be the best approach

Related

Mongodb aggregation pipeline, combine result from two facet pipeline

I'm having a claim type:
type TClaim: {
insuredId: number,
treatmentInfo: { amount: number }[]
}
and a list of claims:
[
{
insuredId: 1,
treatmentInfo: [{amount: 1}, {amount: 2}]
},
{
insuredId: 1,
treatmentInfo: [{amount: 3}, {amount: 4}]
},
{
insuredId: 2,
treatmentInfo: [{amount: 1}, {amount: 2}]
}
]
I want to get the result like:
[{insuredId: 1, numberOfClaims: 2, amount: 10},{insuredId: 2, numberOfClaims: 1, amount: 3}]
I'm using the $facet operator in mongodb aggregation, one for counting numberOfClaims and one for calculating the amount of each insurer. But I can't combine it to get the result that I want.
$facet: {
totalClaims: [ { $group: { _id: '$insuredId', totalClaims: { $count: {} } } } ],
amount: [ { $unwind: { path: '$treatmentInfo'}},
{ $group:
{ _id: '$insuredId',
amount: { $sum: '$treatmentInfo.amount',
},
},
},
]
Is there a reason why you want to use $facet? - I am just curious
You just need to add a new fields that sums up all the amount in the array first and then do a group stage by insuredId. The query is pretty much self-explanatory.
db.collection.aggregate([
{
"$addFields": {
"totalAmount": {
"$sum": "$treatmentInfo.amount"
}
}
},
{
"$group": {
"_id": "$insuredId",
"numberOfClaims": {
"$sum": 1
},
"amount": {
"$sum": "$totalAmount"
}
}
}
])
Result:
[
{
"_id": 1,
"amount": 10,
"numberOfClaims": 2
},
{
"_id": 2,
"amount": 3,
"numberOfClaims": 1
}
]
MongoDB Playground

Find missing documents in MongoDB using aggregation pipeline

I am trying to find the missing documents in MongoDB. Scenario is like this: I have a collection where the documents have a hour-stamp field. I want to find which hours are missing given the time range.
Since I am writing this question on Metabase, I am limited to use only one aggregation pipeline, meaning I can't use $out to make temperate collection and do $lookup for join.
I can only fill in the code of db.collection.aggregate(my code)
Any idea how can I achieve this? Thanks a lot!
Was able to achieve this so just sharing my solution.
Idea:
Generate an array for the hours needed to be checked. Notice I use
hour-diff from current hour, so I can dynamically check if (-9
hours) is missing. Reason for doing this is that I cannot find a way
to programmatically generate this array using absolute hour-stamp
(2022-07-11 10:00:00).
Calculate the hour-diff from current of the data's hour-stamp.
Use $setDifference to find the missing hours.
Calculate the absolute hour-stamp from the hour-diff value to get the missing hours.
Works for my need, and hope this will help someone.
Code snippet (I use this for finding missing hours between -6 to -30 hours for each data_source) :
db.getCollection(<collection_name>).aggregate(
[
{ $project: {
_id: 1,
data_source: 1,
available_date_time: { $toDate: "$available_date_time"},
current_hour: { $dateFromString: { dateString: { $dateToString: { format: "%Y-%m-%dT%H", date: ISODate() } }, format: "%Y-%m-%dT%H" } },
}},
{ $project: {
_id:1,
data_source:1,
available_date_time: 1,
current_hour: 1,
current_hour_minus_6hr: { $subtract: [ "$current_hour", { $multiply: [6, 60, 60, 1000] }] },
current_hour_minus_30hr: { $subtract: [ "$current_hour", { $multiply: [30, 60, 60, 1000] }] },
}},
{ $project: {
_id:1,
data_source:1,
available_date_time: 1,
current_hour: 1,
past_6hr_comp: { $subtract: [ { $toDate: "$available_date_time"}, "$current_hour_minus_6hr" ] },
past_30hr_comp: { $subtract: [ { $toDate: "$available_date_time"}, "$current_hour_minus_30hr" ] },
}},
{ $match: {
$and: [
{ past_30hr_comp: { $gte: 0 } },
{ past_6hr_comp: { $lte: 0} }
]
}},
{ $project: {
_id: 1,
data_source:1,
available_date_time: 1,
current_hour: 1,
hour_diff_from_current: { $divide: [ {$subtract: [ "$current_hour", "$available_date_time" ] }, 3600000 ] }
}},
{ $group: {
_id: { data_source: "$data_source" },
count: { $sum: 1 },
available_hour_diff_set: { $addToSet: "$hour_diff_from_current" },
current_hour: { $first: "$current_hour" }
}},
{ $project: {
_id: 0,
data_source: "$_id.data_source",
available_hour_count: "$count",
available_hour_diff_set: "$available_hour_diff_set",
required_hour_diff_set: [30, 29, 28, 27, 26, 25, 24, 23, 22, 21, 20, 19, 18, 17, 16, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6],
current_hour: "$current_hour"
}},
{ $project: {
data_source: 1,
available_hour_count: 1,
unavailable_hour_diff_set: { $setDifference: [ "$required_hour_diff_set", "$available_hour_diff_set" ] },
current_hour:1
}},
{ $unwind: "$unavailable_hour_diff_set" },
{ $project: {
data_source: 1,
available_hour_count: 1,
current_hour: 1,
unavailable_hour_diff: "$unavailable_hour_diff_set",
}},
{ $project: {
data_source: 1,
available_hour_count: 1,
current_hour: 1,
unavailable_hour_diff: "$unavailable_hour_diff",
missing_hour: { $subtract: [ "$current_hour", { $multiply: [ "$unavailable_hour_diff", 60, 60, 1000 ] } ] }
}}
]

How to do double group and push with two fields in MongoDB?

I have data like this:
[
{
"channel": "abc",
"date": "2019-01-01",
"revenue": 100,
"quantity": 100,
},
{
"channel": "xyz",
"date": "2019-02-10",
"revenue": 100,
"quantity": 100,
},
{
"channel": "def",
"date": "2020-01-01",
"revenue": 100,
"quantity": 100,
},
{
"channel": "abc",
"date": "2021-06-01",
"revenue": 100,
"quantity": 100,
},
{
"channel": "abc",
"date": "2021-06-12",
"revenue": 100,
"quantity": 100,
}
]
I want to group by channel and push data and again group by date (in month and year only) and push data and add a field after these pushes. The dates are all Date objects, not Strings. The avg_revenue is tot_revenue divided by tot_quantity.
[
{
"channel": "abc",
"dates": [
{
"date": "2019-01",
"totals": {
"tot_revenue": 100,
"tot_quantity": 100,
"avg_revenue": 1,
}
},
{
"date": "2019-01",
"totals": {
"tot_revenue": 200,
"tot_quantity": 200,
"avg_revenue": 1,
}
}
]
},
{
"channel": "def",
"dates": [
{
"date": "2020-01",
"totals": {
"tot_revenue": 100,
"tot_quantity": 100,
"avg_revenue": 1,
}
}
]
},
{
"channel": "xyz",
"dates": [
{
"date": "2019-02",
"totals": {
"tot_revenue": 100,
"tot_quantity": 100,
"avg_revenue": 1,
}
}
]
},
]
My attempt:
db.collection.aggregate([
{
"$set": {
"date": {
"$dateFromString": {
"dateString": "$date",
"format": "%Y-%m-%d"
}
}
}
},
{
$group: {
_id: {
channel: "$channel",
month: {
$month: "$date"
},
year: {
$year: "$date"
}
},
report_dates: {
$push: {
report_date: "$date",
revenue: "$revenue",
quantity: "$quantity",
}
},
}
},
{
$group: {
_id: {
month: "$month",
year: "$year",
},
values: {
$push: {
revenue: "$revenue",
quantity: "$quantity",
}
},
}
}
])
You need to create an aggregation pipeline that consists of two $group steps, the first to group all the documents by the channel and date fields whilst accumulating the tot_revenue and tot_quantity aggregates. The other $group stage will compute the dates list with the totals.
The following pipeline should give the desired output:
db.collection.aggregate([
{ '$group': {
'_id': {
'channel': '$channel',
'date': {
'$dateToString': {
'format': "%Y-%m", 'date': {
"$dateFromString": {
"dateString": "$date",
"format": "%Y-%m-%d"
}
}
}
}
},
'tot_revenue': { '$sum': '$revenue' },
'tot_quantity': { '$sum': '$quantity' },
} },
{ '$group': {
'_id': '$_id.channel',
'dates': {
'$push': {
'date': '$_id.date',
'totals': {
'tot_revenue': '$tot_revenue',
'tot_quantity': '$tot_quantity',
'avg_revenue': { '$divide': ['$tot_revenue','$tot_quantity'] }
}
}
}
} }
])

How can I get a single item from the array and display it as an object? and not as an array Mongodb

I have a collection from which I need specific obj e.g. notes.blok2 and notes.curse5 as an object, not as an array
{
"year":2020,
"grade":4,
"seccion":"A",
"id": 100,
"name": "pedro",
"notes":[{"curse":5,
"block":1,
"score":{ "a1": 5,"a2": 10, "a3": 15}
},{"curse":5,
"block":2,
"score":{ "b1": 10,"b2": 20, "b3": 30}
}
]
}
My query
notas.find({
"$and":[{"grade":1},{"seccion":"A"},{"year":2020}]},
{"projection":{ "grade":1, "seccion":1,"name":1,"id":1,
"notes":{"$elemMatch":{"block":2,"curse":5}},"notes.score":1} })
It works but returns notes like array
{
"_id": "55",
"id": 100,
"grade": 5,
"name": "pedro",
"seccion": "A",
"notes": [
{"score": { "b1": 10,"b2": 20, "b3": 30} }
]
}
But I NEED LIKE THIS: score at the same level as others and if doesn't exist show empty "score":{}
{
"year":2020,
"grade":5,
"seccion":"A",
"id": 100,
"name": "pedro",
"score":{ "b1": 10,"b2": 20, "b3": 30}
}
Demo - https://mongoplayground.net/p/XlJqR2DYW1X
You can use aggregation query
db.collection.aggregate([
{
$match: { // filter
"grade": 1,
"seccion": "A",
"year": 2020,
"notes": {
"$elemMatch": {
"block": 2,
"curse": 5
}
}
}
},
{ $unwind: "$notes" }, //break into individual documents
{
$match: { // match query on individual note
"notes.block": 2,
"notes.curse": 5
}
},
{
$project: { // projection
"grade": 1,
"seccion": 1,
"name": 1,
"id": 1,
"score": "$notes.score"
}
}
])
Update
Demo - https://mongoplayground.net/p/mq5Kue3UG42
Use $filter
db.collection.aggregate([
{
$match: {
"grade": 1,
"seccion": "A",
"year": 2020
}
},
{
$set: {
"score": {
"$filter": {
"input": "$notes",
"as": "note",
"cond": {
$and: [
{
$eq: [ "$$note.block",3]
},
{
$eq: [ "$$note.curse", 5 ]
}
]
}
}
}
}
},
{
$project: {
// projection
"grade": 1,
"seccion": 1,
"name": 1,
"id": 1,
"score": {
"$first": "$score.score"
}
}
}
])
If you want empty object for score when match not found you can do -
Demo - https://mongoplayground.net/p/dumax58kgrc
{
$set: {
score: {
$cond: [
{ $size: "$score" }, // check array length
{ $first: "$score" }, // true - take 1st
{ score: {} } // false - set empty object
]
}
}
},

Unable to filter by date the array field of a Collection in MongoDB

I am struggling with MongoDb in order to achieve a desirable result.
My Collection looks like:
{
_id: ...
place: 1
city: 6
user: 306
createDate: 2014-08-10 12:20:21,
lastUpdate: 2014-08-14 10:11:01,
data: [
{
customId4: 4,
entryDate: 2014-07-12 12:01:11,
exitDate: 2014-07-12 13:12:12
},
{
customId4: 4,
entryDate: 2014-07-14 00:00:01,
},
{
customId4: 5,
entryDate: 2014-07-15 11:01:11,
exitDate: 2014-07-15 11:05:15
},
{
customId4: 5,
entryDate: 2014-07-22 21:01:11,
exitDate: 2014-07-22 21:23:22
},
{
customId4: 4,
entryDate: 2014-07-23 14:00:11,
},
{
customId4: 4,
entryDate: 2014-07-29 22:00:11,
exitDate: 2014-07-29 23:00:12
},
{
customId4: 5,
entryDate: 2014-08-12 12:01:11,
exitDate: 2014-08-12 13:12:12
},
]
}
So what I would like to achieve is the array data that meets the requirements of a certain interval and that has both, entryDate and exitDate values set.
For example, if I filter by the interval "2014-07-23 00:00:00 to 2014-08-31 00:00:00" I would like the result like:
{
result: [
{
_id: {
place: 1,
user: 306
},
city: 6,
place: 1,
user: 306,
data: [
{
customMap: 4,
entryDate: 2014-07-22 21:01:11,
exitDate: 2014-07-22 21:23:22
},
{
customId4: ,
entryDate: 2014-07-29 22:00:11,
exitDate: 2014-07-29 23:00:12
},
]
}
],
ok: 1
}
My custom mongodb query looks like (from, to and placeIds are variables properly configured)
db.myColl.aggregate(
{ $match: {
'user': 1,
'data.entryDate': { $gte: from, $lte: to },
'place': { $in: placeIds },
}},
{ $unwind : "$data" },
{ $project: {
'city': 1,
'place': 1,
'user': 1,
'lastUpdate': 1,
'data.entryDate': 1,
'data.exitDate': 1,
'data.custom': 1,
fromValid: { $gte: ["$'data.entryDate'", from]},
toValid: { $lte: ["$'data.entryDate'", to]}}
},
{ $group: {
'_id': {'place': '$place', 'user': '$user'},
'city': {'$first': '$city'},
'place': {'$first': '$place'},
'user': {'$first': '$user'},
'data': { '$push': '$data'}
}}
)
But this doesn't filter the way I want because it outputs every document that meets the $match operand conditions, inside the $project operand I am unable to define the condition (I don't know if this is how it has to be done in mongoDB)
Thanks in advance!
You were on the right track, but what you might be missing with the aggregation "pipeline" is that just like the "|" pipe operator in the unix shell you "chain" the pipeline stages together just as you would chain commands.
So in fact to can have a second $match pipeline stage that does the filtering for you:
db.myColl.aggregate([
{ "$match": {
"user": 1,
"data.entryDate": { "$gte": from, "$lte": to },
"place": { "$in": "placeIds" },
}},
{ "$unwind": "$data" },
{ "$match": {
"data.entryDate": { "$gte": from, "$lte": to },
}},
{ "$group": {
"_id": "$_id",
"place": { "$first": "$place" },
"city": { "$first": "$city" },
"user": { "$first": "$user" },
"data": { "$push": "$data" }
}}
])
Using the actual _id of the document as a grouping key presuming that you want the document back but just with a filtered array.
From MongoDB 2.6, as long as your matching array elements are unique, you could just do the same thing within $project using the $map and $setDifference** operators:
db.myColl.aggregate([
{ "$match": {
"user": 1,
"data.entryDate": { "$gte": from, "$lte": to },
"place": { "$in": "placeIds" },
}},
{ "$project": {
"place": 1,
"city": 1,
"user": 1,
"data": {
"$setDifference": [
{ "$map": {
"input": "$data",
"as": "el",
"in": {"$cond": [
{ "$and": [
{ "$gte": [ "$$el.entryDate", from ] },
{ "$lte": [ "$$el.entryDate", to ] }
]},
"$$el",
false
]}
}},
[false]
]
}
}}
])
That does the same logical thing by processing each array element and evaluating whether it meets the conditions. If so then the element content is returned, if not the false is returned. The $setDifference filters out all the false values so that only those that match remain.