Finding ranges of continuous values - mongodb

I have the following Mongo collection:
[
{
"key": 1,
"user": "A",
"comment": "commentA1"
},
{
"key": 2,
"user": "A",
"comment": "commentA2"
},
{
"key": 5,
"user": "A",
"comment": "commentA5"
},
{
"key": 2,
"user": "B",
"comment": "commentB2"
},
{
"key": 3,
"user": "B",
"comment": "commentB3"
},
{
"key": 6,
"user": "B",
"comment": "commentB6"
}
]
and I need to find the first continuous keys, with no gaps, per user.
So, for user A I should get the first 2 documents, and for user B the first two also.
The collection might contain more than 2M documents, so the query should work fast.
I have found SQL solutions for this problem (http://www.silota.com/docs/recipes/sql-gap-analysis-missing-values-sequence.html in section number 3), but I am looking for a Mongo solution.
How can I do it in Mongo 4.0 (DocumentDB) ?

EDIT: according to further elaboration on the comments,
One option is:
db.collection.aggregate([
{$sort: {key: 1}},
{$group: {
_id: "$user",
data: {$push: {key: "$key", comment: "$comment"}},
shadow: {$push: {$add: ["$key", 1]}}
}},
{$project: {
data: 1,
shadow: {$filter: {input: "$shadow", cond: {$in: ["$$this", "$data.key"]}}}
}},
{$project: {data: 1, shadow: 1, firstItem: {$subtract: [{$first: "$shadow"}, 1]}}},
{$project: {data: 1, firstItem: 1, shadow: {$concatArrays: [["$firstItem"], "$shadow"]}}},
{$project: {
data: 1,
shadow: {$reduce: {
input: {$range: [0, {$size: "$shadow"}]},
initialValue: [],
in: {
$concatArrays: [
"$$value",
{$cond: [
{$eq: [
{$arrayElemAt: ["$shadow", "$$this"]},
{$add: ["$$this", "$firstItem"]}
]},
[{$arrayElemAt: ["$shadow", "$$this"]}],
[]
]},
]
}
}
}
}
},
{$project: {data: {$filter: {input: "$data", cond: {$in: ["$$this.key", "$shadow"]}}}}},
{$unwind: "$data"},
{$project: {comment: "$data.comment", key: "$data.key"}}
])
See how it works on the playground example

Related

Mongodb Aggregations - Group by date including condition

I have a series of documents gathered by aggregation grouping. This is the result for one document:
{
"_id": {
"ip": "79.xxx.xxx.117",
"myDate": "2022-10-19"
},
"date": "2022-10-19",
"allVisitedPages": [
{
"page": "/",
"time": {
"time": "2022-10-19T11:35:44.655Z",
"tz": "-120",
"_id": "634fe1100a011986b7137da0"
}
},
{
"page": "/2",
"time": {
"time": "2022-10-19T12:14:29.536Z",
"tz": "-120",
"_id": "634fea257acb264f23d421f1"
}
},
{
"page": "/",
"time": {
"time": "2022-10-19T15:37:30.002Z",
"tz": "-120",
"_id": "634fea266001ea364eeb38ea"
}
},
],
"visitedPages": 3,
"createdAt": "2022-10-19T11:35:44.920Z"
},
I want to get this (in this case 2 documents as the time difference between array position 2 and 3 is greater than 2 hours):
{
"_id": {
"ip": "79.xxx.xxx.117",
"myDate": "2022-10-19"
},
"date": "2022-10-19",
"allVisitedPages": [
{
"page": "/",
"durationInMinutes": "39",
"time": {
"time": "2022-10-19T11:35:44.655Z",
"tz": "-120",
"_id": "634fe1100a011986b7137da0"
}
},
{
"page": "/2",
"durationInMinutes": "2",
"time": {
"time": "2022-10-19T12:14:29.536Z",
"tz": "-120",
"_id": "634fea257acb264f23d421f1"
}
}
],
"visitedPages": 2,
},
{
"_id": {
"ip": "79.xxx.xxx.117",
"myDate": "2022-10-19"
},
"date": "2022-10-19",
"allVisitedPages": [
{
"page": "/",
"durationInMinutes": "2",
"time": {
"time": "2022-10-19T15:37:30.002Z",
"tz": "-120",
"_id": "634fea266001ea364eeb38ea"
}
},
],
"visitedPages": 1,
},
I want to get a new grouping document if the time between an array position and the following array position is greater than 2 hours. On the last array position it show always show "2".
I tried $divide and $datediff. But this is not possible on the group stage as it's an unary operator. An approach I tried is to calculate the sum of start and end time by dividing. But how to execute this on an array level on the group stage? Maybe someone could point me in the right direction if possible at all?
You can group and then reduce, but another option is to use $setWindowFields to calculate your grouping index before grouping:
db.collection.aggregate([
{$setWindowFields: {
partitionBy: {$concat: ["$ip", "$date"]},
sortBy: {"time.time": 1},
output: {prevtime: {
$push: "$time.time",
window: {documents: [-1, "current"]}
}}
}},
{$addFields: {
minutesDiff: {
$toInt: {
$dateDiff: {
startDate: {$first: "$prevtime"},
endDate: {$last: "$prevtime"},
unit: "minute"
}
}
}
}},
{$addFields: {deltaIndex: {$cond: [{$gt: ["$minutesDiff", 120]}, 1, 0]}}},
{$setWindowFields: {
partitionBy: {$concat: ["$ip", "$date"]},
sortBy: {"time.time": 1},
output: {
groupIndex: {
$sum: "$deltaIndex",
window: {documents: ["unbounded", "current"]}
},
duration: {
$push: "$minutesDiff",
window: {documents: ["current", 1]}
}
}
}
},
{$set: {
duration: {
$cond: [
{$and: [
{$eq: [{$size: "$duration"}, 2]},
{$lte: [{$last: "$duration"}, 120]}
]},
{$last: "$duration"},
2
]
}
}},
{$group: {
_id: {ip: "$ip", myDate: "$date", groupIndex: "$groupIndex"},
date: {$first: "$date"},
allVisitedPages: {$push: {page: "$page", time: "$time", duration: "$duration"}},
visitedPages: {$sum: 1}
}},
{$unset: "_id.groupIndex"}
])
See how it works on the playground example

how to use $match after $group in mongodb aggregation

I have 4 products. I want to know the count of product-4 for users who has product-1 or product-2
Sample data:
[
{
"user_id": 1,
"product_type": "product-1"
},
{
"user_id": 1,
"product_type": "product-4"
},
{
"user_id": 1,
"product_type": "product-4"
},
{
"user_id": 2,
"product_type": "product-1"
}
]
user-1 has two product-4 and one product-1 (that counts 2)
user-2 has only product-1, but no product-4 (hence that does not count)
This is how I tried
db.collection.aggregate([
{
$match: {
product_type: {
$in: [
"product-1​",
"product-2",
],
},
},
},
{
$group: {
_id: "$user_id",
},
},
{
$match: {
user_id: { $in: "$_id"}, // I want to use $group's result in here
product_type: "product-4",
},
}
]);
Expected results are:
[
{
"_id": 1,
"count": 2
},
{
"_id": 2,
"count": 0
}
]
Note:
I dont have a backend, I have to this using mongodb only.
Does this answer your question?
db.collection.aggregate([
{$group: {_id: "$user_id", data: {$push: "$product_type"}}},
{$match: {$expr: {$or: [
{$in: ["product-1", "$data"]},
{$in: ["product-2", "$data"]}
]}}},
{$project: {
count: {
$size: {
$filter: {
input: "$data",
cond: {$eq: ["$$this", "product-4"]}
}
}
}
}}
])
See how it works on the playground example

How do I group and count values by value range in MongoDB

I have the following documents in my MongoDB:
_id: ObjectId(...)
'timestamp': 2022-11-03T10:00:00.000+00:00
score: 1
_id: ObjectId(...)
'timestamp': 2022-11-03T09:00:00.000+00:00
score: 3
_id: ObjectId(...)
'timestamp': 2022-11-03T10:00:00.000+00:00
score: 6
_id: ObjectId(...)
'timestamp': 2022-11-03T10:00:00.000+00:00
score: 10
I want to make an aggregation that counts the score within the range of (gte)1-(lt)5 as poor, (gte)5-(lt)7 as ok, (gte)7-(lt)8.5 as good and (gte)8.5-(lte)10 as excellent.
So the result would look like this:
{
"data": [
{
"name": "excellent",
"count": 1
},
{
"name": "good",
"count": 0
},
{
"name": "ok",
"count": 1
},
{
"name": "poor",
"count": 2
}
]
}
How do I achieve that?
If you accept an answer only with documents that have a count, you can do:
db.collection.aggregate([
{$project: {
_id: {
$arrayElemAt: [
["poor", "ok", "good", "excellent"],
{$floor: {$divide: ["$score", 10]}}
]}
}},
{$group: {_id: "$_id", count: {$sum: 1}}}
])
Otherwise you need to create all categories:
db.collection.aggregate([
{$group: {
_id: 0,
excellent: {$sum: {$cond: [{$gte: ["$score", 30]}, 1, 0]}},
good: {$sum: {$cond: [{$and: [{$gte: ["$score", 20]}, {$lt: ["$score", 30]}]}, 1, 0]}},
ok: {$sum: {$cond: [{$and: [{$gte: ["$score", 10]}, {$lt: ["$score", 20]}]}, 1, 0]}},
poor: {$sum: {$cond: [{$lt: ["$score", 10]}, 1, 0]}}
}},
{$unset: "_id"},
{$project: {data: {$objectToArray: "$$ROOT"}}},
{$project: {
data: {$map: {
input: "$data",
in: {nmae: "$$this.k", count: "$$this.v"}
}}
}}
])
See how it works on the playground example

How to get or return only the matching object from an nested array using mongoose

{
"_id": "6339f99ee18b2481a04b4fe8",
"userId": "60a8a51cf2229813a45d2238",
"array1": [
{
"someId1": "6339f99ee18b2481a04b4fe9",
"customIndex": 2,
"array2": [
{
"someId2": "6339f99ee18b2481a04b4fea",
"startDate": 2022-10-10T19:56:26.000+00:00,
"endDate": 2022-10-12T19:56:26.000+00:00,
}
]
},
{
"someId1": "6345ca40112b743fd8172be0",
"customIndex": 4,
"array2": [
{
"someId2": "6345ca40112b743fd8172be1",
"startDate": 2022-10-10T19:56:26.000+00:00,
"endDate": 2022-10-27T19:56:26.000+00:00,
}
]
}
]
}
I have above structure in mongoDB and want to get only that object from array1 which matches the conditions of endDate > 2022-10-17
Here's what I try to do:
result= await Collection.find({
userId: { '$in': userIdList},
'array1.array2.endDate': { "$gte": 2022-10-17}
})
But above return the both objects from array1 even though the endDate for one object is less than 2022-10-17
How can I get the the response like below? Also, Am I using the right Mongoose calls to achieve what I am trying to achieve.
Expected response that I am trying to achieve:
{
"_id": "6339f99ee18b2481a04b4fe8",
"userId": "60a8a51cf2229813a45d2238",
"array1": [
{
"someId1": "6345ca40112b743fd8172be0",
"customIndex": 4,
"array2": [
{
"someId2": "6345ca40112b743fd8172be1",
"startDate": 2022-10-10T19:56:26.000+00:00,
"endDate": 2022-10-27T19:56:26.000+00:00,
}
]
}
]
}
If array1 can contain several such items, and array2 contain several such items, one option is using $reduce with $filter and $mergeObjects for this:
db.collection.aggregate([
{$match: {userId: {'$in': userIdList}}}
{$project: {
userId: 1,
array1: {
$reduce: {
input: "$array1",
initialValue: [],
in: {$concatArrays: [
"$$value",
[{$mergeObjects: [
"$$this",
{array2: {
$filter: {
input: "$$this.array2",
as: "innerItem",
cond: {$gte: [
"$$innerItem.endDate",
{$dateFromParts: {year: 2022, month: 10, day: 17}}
]}
}
}}
]}]
]}
}
}
}},
{$project: {
userId: 1,
array1: {$filter: {
input: "$array1",
cond: {$gt: [{$size: "$$this.array2"}, 0]}
}}
}}
])
See how it works on the playground example

Double nested array with multiple nested documents identify wrong value

I need to identify which documents have the wrong date string ( $gt:10 characters) from all my collection :
{
"_id": ObjectId("5c05984246a0201286d4b57a"),
f: "x",
"_a": [
{
"_onlineStore": {}
},
{
"_p": {
"s": {
"a": {
"t": [
{
"dateP": "20200-09-20",
"l": "English",
"size": "XXL"
}
]
},
"c": {
"t": [
{
"dateP": "20300-09-20",
"l": "English",
"size": "XXL"
}
]
}
}
}
}
]
}
and output need to be as follow:
{f:"x",dateP:"20200-09-20", t:"c"}
{f:"x",dateP:"20300-09-20", t:"a"}
The last field in the output "t" not compulsory but desirable ...
Please, help ...
We can use $objectToArray for this:
db.collection.aggregate([
{$unwind: "$_a"},
{$project: {_id: 0, f: 1, data: {$objectToArray: "$_a._p.s"}}},
{$unwind: "$data"},
{$unwind: "$data.v.t"},
{$match: {$expr: {$gt: [{$strLenCP: "$data.v.t.dateP"}, 10]}}},
{$project: {f: 1, dateP: "$data.v.t.dateP", t: "$data.k"}}
])
See how it works on the playground example