Find missing documents in MongoDB using aggregation pipeline - mongodb

I am trying to find the missing documents in MongoDB. Scenario is like this: I have a collection where the documents have a hour-stamp field. I want to find which hours are missing given the time range.
Since I am writing this question on Metabase, I am limited to use only one aggregation pipeline, meaning I can't use $out to make temperate collection and do $lookup for join.
I can only fill in the code of db.collection.aggregate(my code)
Any idea how can I achieve this? Thanks a lot!

Was able to achieve this so just sharing my solution.
Idea:
Generate an array for the hours needed to be checked. Notice I use
hour-diff from current hour, so I can dynamically check if (-9
hours) is missing. Reason for doing this is that I cannot find a way
to programmatically generate this array using absolute hour-stamp
(2022-07-11 10:00:00).
Calculate the hour-diff from current of the data's hour-stamp.
Use $setDifference to find the missing hours.
Calculate the absolute hour-stamp from the hour-diff value to get the missing hours.
Works for my need, and hope this will help someone.
Code snippet (I use this for finding missing hours between -6 to -30 hours for each data_source) :
db.getCollection(<collection_name>).aggregate(
[
{ $project: {
_id: 1,
data_source: 1,
available_date_time: { $toDate: "$available_date_time"},
current_hour: { $dateFromString: { dateString: { $dateToString: { format: "%Y-%m-%dT%H", date: ISODate() } }, format: "%Y-%m-%dT%H" } },
}},
{ $project: {
_id:1,
data_source:1,
available_date_time: 1,
current_hour: 1,
current_hour_minus_6hr: { $subtract: [ "$current_hour", { $multiply: [6, 60, 60, 1000] }] },
current_hour_minus_30hr: { $subtract: [ "$current_hour", { $multiply: [30, 60, 60, 1000] }] },
}},
{ $project: {
_id:1,
data_source:1,
available_date_time: 1,
current_hour: 1,
past_6hr_comp: { $subtract: [ { $toDate: "$available_date_time"}, "$current_hour_minus_6hr" ] },
past_30hr_comp: { $subtract: [ { $toDate: "$available_date_time"}, "$current_hour_minus_30hr" ] },
}},
{ $match: {
$and: [
{ past_30hr_comp: { $gte: 0 } },
{ past_6hr_comp: { $lte: 0} }
]
}},
{ $project: {
_id: 1,
data_source:1,
available_date_time: 1,
current_hour: 1,
hour_diff_from_current: { $divide: [ {$subtract: [ "$current_hour", "$available_date_time" ] }, 3600000 ] }
}},
{ $group: {
_id: { data_source: "$data_source" },
count: { $sum: 1 },
available_hour_diff_set: { $addToSet: "$hour_diff_from_current" },
current_hour: { $first: "$current_hour" }
}},
{ $project: {
_id: 0,
data_source: "$_id.data_source",
available_hour_count: "$count",
available_hour_diff_set: "$available_hour_diff_set",
required_hour_diff_set: [30, 29, 28, 27, 26, 25, 24, 23, 22, 21, 20, 19, 18, 17, 16, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6],
current_hour: "$current_hour"
}},
{ $project: {
data_source: 1,
available_hour_count: 1,
unavailable_hour_diff_set: { $setDifference: [ "$required_hour_diff_set", "$available_hour_diff_set" ] },
current_hour:1
}},
{ $unwind: "$unavailable_hour_diff_set" },
{ $project: {
data_source: 1,
available_hour_count: 1,
current_hour: 1,
unavailable_hour_diff: "$unavailable_hour_diff_set",
}},
{ $project: {
data_source: 1,
available_hour_count: 1,
current_hour: 1,
unavailable_hour_diff: "$unavailable_hour_diff",
missing_hour: { $subtract: [ "$current_hour", { $multiply: [ "$unavailable_hour_diff", 60, 60, 1000 ] } ] }
}}
]

Related

How to convert timestamp to date in an array with Mongodb?

I need to convert the timestamp to date and update it in the same field, in Mongosh.
Here is the data:
{ _id: 1,
name: 'Annelise',
movies: [ { movieid: 12, rating: 3, timestamp: 97830291 } ] },
{ _id: 2,
name: 'Maria',
movies: [ { movieid: 3, rating: 3, timestamp: 978301398 } ]
},
{
_id: 3,
name: 'Paul',
movies: [ { movieid: 23, rating: 4, timestamp: 978302174 } ]
},
{
_id: 4,
name: 'Fred',
movies: [
{ movieid: 23, rating: 4, timestamp: 978302174 },
{ moviedid: 45, rating: 2, timestamp: 978302149 }
]
},
{ _id: 5, name: 'Annelise', timestamp: 97830291 },
{ _id: 6, name: 'Maria', timestamp: 978301398 },
{ _id: 7, name: 'Paul', timestamp: 978302174 },
{ _id: 8, name: 'Fred', timestamp: 978302149 }
I tried using an aggregation pipeline, but I have 2 problems:
This command works to update the timestamps for _id 5 to 8, but how can I save it to the same field?
db.usertest.aggregate(
[ {"$project":
{"timestamp": { $dateToString: { format: "%Y-%m-%d", date: { "$toDate": {$toLong:"$timestamp"}}}}
} } ])
How can I update the timestamp inside the movies array for _id 1 to 4?
I tried the following but this does not work:
db.usertest.aggregate( [ {"$project": {"timestamp": { $dateToString: { format: "%Y-%m-%d", date: { "$toDate": {$toLong:"$timestamp"}}}}}} ])
Thanks for your help!
You can use an aggregation pipeline in update. In MongoDB, the date in unix timestamp is in milliseconds(ms). So, multiply your raw timestamp with 1000 before converting them to date.
For _id 1-4, use $map to perform element-wise transformation.
db.collection.update({
_id: {
$in: [
1,
2,
3,
4
]
}
},
[
{
"$addFields": {
"movies": {
"$map": {
"input": "$movies",
"as": "m",
"in": {
movieid: "$$m.movieid",
rating: "$$m.rating",
timestamp: {
"$toDate": {
"$multiply": [
{
$toLong: "$$m.timestamp"
},
1000
]
}
}
}
}
}
}
}
],
{
multi: true
})
Mongo playground
For _id: 5-8, simply update the field.
db.collection.update({
_id: {
$in: [
5,
6,
7,
8
]
}
},
[
{
"$addFields": {
"timestamp": {
"$toDate": {
"$multiply": [
{
$toLong: "$timestamp"
},
1000
]
}
}
}
}
],
{
multi: true
})
Mongo playground

Select specific fields from a specific element of array by $elemMatch

I want to select some specific fields from a particular object in a nested array, achieved through mongoose/mongo.
Playground Link
Consider the data:
[
{
"_id": ObjectId("5ff4b728b6af610f0851d2a6"),
"totalScore": 500,
"totalCompleted": 100,
"monthly": [
{
year: 2021,
month: 8,
attempted: 10,
completed: 5,
score: 20,
}
],
},
]
I want to first get all the documents, and then inside the "monthly", I want to select only the ones which match month = 8, and return only the "score" field and ignore rest of the fields like "attempted", "completed", etc.
I have tried the following query so far:
db.collection.find({},
{
totalScore: 1,
"monthly": {
$elemMatch: {
year: 2021,
month: 8,
},
},
})
It returns all the keys of the entire "monthly" object. Like so:
[
{
"_id": ObjectId("5ff4b728b6af610f0851d2a6"),
"monthly": [
{
"attempted": 10,
"completed": 5,
"month": 8,
"score": 20,
"year": 2021
}
],
"totalScore": 500
},
]
But, what I want, is to only select the "score" field from the "monthly".
So the result data would be:
[
{
"_id": ObjectId("5ff4b728b6af610f0851d2a6"),
"monthly": [
{
"score": 20,
}
],
"totalScore": 500
},
How should I approach this problem?
This can be done with a simple aggregation using $map and $filter:
db.collection.aggregate([
{
$project: {
totalScore: 1,
monthly: {
$map: {
input: {
$filter: {
input: "$monthly",
as: "item",
cond: {
$eq: [
"$$item.month",
8
]
}
}
},
as: "item",
in: {
score: "$$item.score"
}
}
}
}
}
])
Example on mongoplayground: https://mongoplayground.net/p/5PbR49Ufxb5

keep latest record based on multiple grouping

I wrote a multi-stage pipeline to arrive at this set of documents:
{'_id': '1234'),
'info': [{'type': 'patient',
'patient_id': 'p1'},
{'type': 'doc',
'doc_id': 'd1'},
{'type': 'ldlc',
'dt': datetime.datetime(2018, 10, 29, 12, 7, 23),
'val': 136},
{'type': 'bp',
'dt': datetime.datetime(2014, 8, 25, 4, 2, 27),
'val': [{'dias': 74}, {'sys': 105}]}]},
{'_id': '1235'),
'info': [{'type': 'patient',
'patient_id': 'p2'},
{'type': 'doc',
'doc_id': 'd1'},
{'type': 'ldlc',
'dt': datetime.datetime(2016, 3, 31, 21, 30, 34),
'val': 153},
{'type': 'bp',
'dt': datetime.datetime(2013, 7, 3, 18, 3, 12),
'val': [{'dias': 86}, {'sys': 101}]},
{'type': 'bp',
'dt': datetime.datetime(2016, 3, 15, 18, 35, 25),
'val': [{'dias': 85}, {'sys': 108}]},
{'type': 'ldlc',
'dt': datetime.datetime(2018, 10, 1, 12, 7, 23),
'val': 144}]}
I am using pymongo, hence the datetime objects.
Now in each document I only want to keep the last recorded values (sort by dt) for 'ldlc' and 'bp'.
I would prefer it to be as:
{
"_id": '1234',
"patient_id": "p1",
"doc_id": "d1".
"sys": 105,
"dias": 74,
"ldlc": 136
},
{
"_id": '1235',
"patient_id": "p2",
"doc_id": "d1".
"sys": 108,
"dias": 85,
"ldlc": 144
}
since the source documents are generated in an aggregation pipeline, i want to add $project and $group stages after that in order to product the desired result.
Thanks for your help!
There are different approaches to achieve this use case.
I started with $sort to sort based on dates. And then used $facet for parallel grouping.Since you need to keep only the latest record, $last is used to get required values.
Your aggregation can look like below:
db.collection.aggregate([
{
$unwind: "$info"
},
{
$sort: {
"info.dt": 1
}
},
{
"$facet": {
"ldlc": [
{
"$match": {
"info.type": "ldlc"
}
},
{
"$group": {
"_id": "$_id",
"ldlc": {
$last: "$info.val"
}
}
}
],
"bp": [
{
"$match": {
"info.type": "bp"
}
},
{
"$group": {
"_id": "$_id",
"bp": {
$last: "$info.val"
}
}
},
{
$unwind: "$bp"
}
],
"others": [
{
$match: {
$or: [
{
"info.type": "patient"
},
{
"info.type": "doc"
}
]
}
},
{
"$group": {
"_id": "$_id",
"ids": {
$push: {
p: "$info.patient_id",
d: "$info.doc_id"
}
}
}
},
{
$unwind: "$ids"
}
],
}
},
{
$project: {
data: {
$concatArrays: [
"$others",
"$ldlc",
"$bp"
]
}
}
},
{
$unwind: "$data"
},
{
"$group": {
"_id": "$data._id",
"val": {
$push: {
patient_id: "$data.ids.p",
doc_id: "$data.ids.d",
ldlc: "$data.ldlc",
dias: "$data.bp.dias",
sys: "$data.bp.sys"
}
}
}
},
{
"$project": {
_id: 1,
"v": {
"$reduce": {
"input": "$val",
"initialValue": {},
"in": {
"$mergeObjects": [
"$$value",
"$$this"
]
}
}
}
}
},
{
"$project": {
_id: 1,
patient_id: "$v.patient_id",
doc_id: "$v.doc_id",
ldlc: "$v.ldlc",
dias: "$v.dias",
sys: "$v.sys"
}
}
])
Check out the query result here: Mongo Playground
PS: This may not be the best approach

Trying to use $cond to $sum and $subtract

My documents:
_id:"DwNMQtHYopXKK3rXt"
client_id:"ZrqKavXX8ieGpx5ae"
client_name:"luana"
companyId:"z3ern2Q7rdvviYCGv"
is_active:true
client_searchable_name:"luana"
status:"paid"
items:Object
id:912602
gross_amount:1000
type:"service"
description:"Pedicure com Zé (pacote)"
item_id:"bjmmPPjqKdWfwJqtC"
user_id:"gWjCskpHF2a3xHYy9"
user_id_commission:50
user_id_amount:0
use_package:true
quantity:1
item_costs:Array
discount_cost:Object
type:"package"
value:100
charge_from:"company_only"
entity_id:"LLRirWu5DabkRna7X"
created_at:2019-10-29T10:35:39.493+00:00
updated_at:2019-10-29T10:36:42.983+00:00
version:"2"
created_by:"2QBRDN9MACQagSkJr"
amount:0
multiple_payment_methods:Array
closed_at:2019-10-29T10:36:52.781+00:00
So i made a $project:
{
_id: 0,
closed_at: 1,
serviceId: "$items.item_id",
serviceAmount: "$items.gross_amount",
discounts:"$items.discount_cost"
}
And then $group
_id: {
month: { $month: "$closed_at" },
serviceId: "$serviceId",
discountType: "$discounts.type",
discountValue: "$discounts.value"
},
totalServiceAmount: {
$sum: "$serviceAmount"
}
}
I'm trying to make a $sum of values of the categories in my DB, actually i filtered all the data, so i have exactly what i need, like that;
_id:Object
"month":10
"serviceId":"MWBqhMyW8ataGxjBT"
"discountType":""courtesy"
"discountValue":100
"totalServiceAmount":5000
So, i have 5 types of discounts on my DB, they are: Percentage (discount in percentage), courtesy (make the service amount 0), package (make the service amount 0), gross (gross discount of value) and null if there's no discount o value.
so, if the type of discount is;
Percentage: I need to subtract the discountValue for the totalServiceAmount (discountValue will be in percentage, how i do that subtract if total serviceAmount is on gross value)
Courtesy and package: I need to transform the totalServiceAmount in 0 value.
Gross: i need to subtract the discountValue for the totalServiceAmount.
Null: just let totalServiceAmount.
I tried like that, to make some test, but i really don't know if i'm goign to the right path, the result was null for every amountWithDiscount.
{
$project: {
{
amountWithDiscount: {
$cond: {
if: {
$eq: ["$_id.discountType", "null"]
},
then: "$serviceAmount", else: {
$cond: {
if: {
$eq: ["$_id.discountType", "gross"]
},
then: {
$subtract: ["$serviceAmount", "$_id.discountValue"]
},
else: "$serviceAmount"
}
}
}
}
}
Make sense?
I create a collection with your grouping result:
01) Example of Documents:
[
{
"_id": "5db9ca609a17899b8ba6650d",
"month": 10,
"serviceId": "MWBqhMyW8ataGxjBT",
"discountType": "courtesy",
"discountValue": 0,
"totalServiceAmount": 5000
},
{
"_id": "5db9d0859a17899b8ba66856",
"month": 10,
"serviceId": "MWBqhMyW8ataGxjBT",
"discountType": "gross",
"discountValue": 100,
"totalServiceAmount": 5000
},
{
"_id": "5db9d0ac9a17899b8ba66863",
"month": 10,
"serviceId": "MWBqhMyW8ataGxjBT",
"discountType": "percentage",
"discountValue": 10,
"totalServiceAmount": 5000
},
{
"_id": "5db9d0d89a17899b8ba6687f",
"month": 10,
"serviceId": "MWBqhMyW8ataGxjBT",
"discountType": null,
"discountValue": 10,
"totalServiceAmount": 6000
}
]
02) Query:
db.collection.aggregate([
{
$project: {
discountType: "$discountType",
amountWithDiscount: {
$cond: {
if: {
$eq: [
"$discountType",
null
]
},
then: "$totalServiceAmount",
else: {
$cond: {
if: {
$eq: [
"$discountType",
"gross"
]
},
then: {
$subtract: [
"$totalServiceAmount",
"$discountValue"
]
},
else: {
$cond: {
if: {
$eq: [
"$discountType",
"percentage"
]
},
then: {
$multiply: [
"$totalServiceAmount",
{
$subtract: [
1,
{
$divide: [
"$discountValue",
100
]
}
]
}
]
},
else: "$totalServiceAmount"
}
}
}
}
}
}
}
}
])
A working example at https://mongoplayground.net/p/nU7vhGN-uSp.
I don't know if I fully understand your problem, but
take a look and see if it solves your problem.

Multiple aggregates in the same query

I have a problem that I dont know how to solve it with MongoDB syntax.
In fact, this is my actual query :
db.traitement".aggregate {$match: {timestampentre: {$gt: start}, timestampentre: {$lt: end}}}, {$project: {year: {$year: "$date_entre"}, month: {$month: "$date_entre"}, "carnetsanitairedone.isDoneDouche": "$carnetsanitairedone.isDoneDouche", "carnetsanitairedone.isDoneDetartrage": "$carnetsanitairedone.isDoneDetartrage"}}, {$group: {_id: {year: "$year", month: "$month", "carnetsanitairedone.isDoneDouche": "$carnetsanitairedone.isDoneDouche", "carnetsanitairedone.isDoneDetartrage": "$carnetsanitairedone.isDoneDetartrage"}, count: {$sum: 1}}}
that returns me this resultset :
[ { _id:
{ year: 2014,
month: 10,
'carnetsanitairedone.isDoneDouche': false,
'carnetsanitairedone.isDoneDetartrage': false },
count: 1 },
{ _id:
{ year: 2014,
month: 10,
'carnetsanitairedone.isDoneDouche': true,
'carnetsanitairedone.isDoneDetartrage': true },
count: 1 },
{ _id:
{ year: 1970,
month: 1,
'carnetsanitairedone.isDoneDouche': false,
'carnetsanitairedone.isDoneDetartrage': false },
count: 1 },
{ _id:
{ year: 1970,
month: 1,
'carnetsanitairedone.isDoneDouche': true,
'carnetsanitairedone.isDoneDetartrage': true },
count: 2 } ]
What I really need corresponds to the following resultset :
'year': 2014,
'month': 10,
'count.isDoneDouche': 10,
'count.isNotDoneDouche': 20,
'count.isDoneDetartrage': 30,
'count.isNotDoneDetartrage': 13
Can you help me with this request ?
Thanks for advance
You can conditionally $sum items with the use of the $cond operator for any key that you supply as an _id value:
db.traitement.aggregate([
{ "$match": {
"timestampentre": { "$gt": start, "$lt": end}
}},
{ "$group": {
"_id": {
"year": { "$year": "$date_entre" },
"month": { "$month": "$date_entre" }
},
"countIsDoneDouche": {
"$sum": {
"$cond": [
"$carnetsanitairedone.isDoneDouche",
1,
0
]
}
},
"countIsNotDoneDouche": {
"$sum": {
"$cond": [
{ "$ne": [ "$carnetsanitairedone.isDoneDouche", true ] },
1,
0
]
}
},
"countIsDoneDetartrage": {
"$sum": {
"$cond": [
"$carnetsanitairedone.isDoneDetartrage",
1,
0
]
}
},
"countIsNotDoneDetartrage": {
"$sum": {
"$cond": [
{ "$ne": [ "$carnetsanitairedone.isDoneDetartrage", true ] }
1,
0
]
}
}
}}
])
This allows the conditions of the supplied "ternary" in each $cond operation to determine whether the "counter" is incremented for the current value or not.