How to use Mongo Aggregation to limit results around a given input? - mongodb

I looked through the pipeline stages docs, but did not see how to do this.
Suppose you have a user, and each user has points.
User Points
A 22
B 11
C 15
D 7
So, we use '$sort': { points: -1 } to order the users by points.
Is it possible to use a Mongo Aggregation Stage to find the users before and after a given user?
So, given user C (by id), it would return [A, C, B].

Very interesting question. Maybe exists any better solution.
Disclaimer: I assume the user points is unique
We can use $facet to get expected result, but at high cost (very large query)
db.collection.aggregate([
{
$facet: {
"givenUser": [
{
$match: {
"user": "C"
}
}
],
"allUser": [
{
$sort: {
"Points": -1
}
}
],
"orderedPoints": [
{
$sort: {
"Points": -1
}
},
{
$group: {
_id: null,
Points: {
$push: "$Points"
}
}
},
{
$unwind: "$Points"
}
]
}
},
{
$project: {
allUser: 1,
currIndex: {
$indexOfArray: [
"$orderedPoints.Points",
{
$arrayElemAt: [
"$givenUser.Points",
0
]
}
]
},
beforeIndex: {
$add: [
{
$indexOfArray: [
"$orderedPoints.Points",
{
$arrayElemAt: [
"$givenUser.Points",
0
]
}
]
},
-1
]
},
afterIndex: {
$add: [
{
$indexOfArray: [
"$orderedPoints.Points",
{
$arrayElemAt: [
"$givenUser.Points",
0
]
}
]
},
1
]
}
}
},
{
$project: {
result: [
{
$arrayElemAt: [
"$allUser",
{
$cond: {
if: {
$lt: [
"$beforeIndex",
0
]
},
then: 999,
else: "$beforeIndex"
}
}
]
},
{
$arrayElemAt: [
"$allUser",
"$currIndex"
]
},
{
$arrayElemAt: [
"$allUser",
"$afterIndex"
]
}
]
}
}
])
[
{
"result": [
{
"Points": 22,
"_id": ObjectId("5a934e000102030405000000"),
"user": "A"
},
{
"Points": 15,
"_id": ObjectId("5a934e000102030405000002"),
"user": "C"
},
{
"Points": 11,
"_id": ObjectId("5a934e000102030405000001"),
"user": "B"
}
]
}
]
MongoPlayground
Steps:
We keep into separate fields:
Given user (C),
Order all users by points
Order all points and store inside array (I wish MongoDB allows find array index by object too)
Now we find given user index, calculate indexes for "before"/"after" players.
Now, we create result with 3 elements (before, current, after).
Note: If given user is first / last, we ensure to return null for before / after items.

Related

MongoDB - Best way to choose a document based on some condition inside switch case

I have a structure where I want to match the value of a field on root level with the value of a field inside another object in the same document and then choose a single document based on some condition from the result. This is the structure I have:
{
"name": "somename",
"level": "123",
"nested":[
{
"somefield": "test",
"file": {
level:"123"
}
},
{
"somefield": "test2",
"file": {
level:"124"
}
},
{
"somefield": "test3",
"file": {
level:"123"
}
}
]
}
After unwinding and matching it on a condition (level = nested.file.level) I have left with 2 documents:
[
{
"level": "123",
"name": "somename",
"nested": {
"file": {
"level": "123"
},
"somefield": "test"
}
},
{
"level": "123",
"name": "somename",
"nested": {
"file": {
"level": "123"
},
"somefield": "test3"
}
}
]
Now I want to match on somefield values, this field has 10 different values, these values are in order so if I find a matching document then I will return it or I will go to the next value in the order and check if "somefield": "orderedValue" and so on. So for example:
test
test2
test3
test4
test5
is the order and if I find a document with has "somefield": "test" I will only return that document, else I will check for "somefield": "test2" and so on until I find a single document which satisfies my condition. This is done in order so the first to satisfy the condition that the document I want.
I want to get only 1 document in the end as a result. I thought it would be best to use $switch here and wrote a project stage with $switch.
$project: {
setting: {
$switch: {
branches: [{
'case': {
$eq: [
'$nested.somefield',
'test'
]
},
then: '$nested'
},
{
'case': {
$eq: [
'$nested.somefield',
'test2'
]
},
then: '$nested'
},
{
'case': {
$eq: [
'$nested.somefield',
'test3'
]
},
then: '$nested'
}
],
'default': 'Did not match'
}
}
}
But this won't work as this would be applied on each document and if I have 5 documents with 5 of these values then it will match all of them and return the same array of documents. Any idea on how we can return only the document which matched first?
Solution 1: With $switch operator
3rd stage: $set - Create min field and assign the value based on $switch operator.
4th stage: $sort - Order by min ascending.
5th stage: $limit - Limit to 1 document.
6th stage: $group - Group by $_id. Set the setting field by taking the first document/value with the conditions:
If the min is lesser than or equal to 5, take nested value.
Else, get "Did not match" value.
db.collection.aggregate([
{
$unwind: "$nested"
},
{
$match: {
$expr: {
$eq: [
"$nested.file.level",
"$level"
]
}
}
},
{
$set: {
min: {
$switch: {
branches: [
{
"case": {
$eq: [
"$nested.somefield",
"test"
]
},
then: 1
},
{
"case": {
$eq: [
"$nested.somefield",
"test2"
]
},
then: 2
},
{
"case": {
$eq: [
"$nested.somefield",
"test3"
]
},
then: 3
},
{
"case": {
$eq: [
"$nested.somefield",
"test4"
]
},
then: 4
},
{
"case": {
$eq: [
"$nested.somefield",
"test5"
]
},
then: 5
}
],
"default": 100
}
}
}
},
{
$sort: {
min: 1
}
},
{
$limit: 1
},
{
$group: {
_id: "$_id",
setting: {
$first: {
$cond: {
if: {
$lte: [
"$min",
5
]
},
then: "$nested",
else: "Did not match"
}
}
}
}
}
])
Demo Solution 1 # Mongo Playground
Solution 2: With $let operator
3rd stage: $set - Create min field. Declare the index variable via $let with get the array index by nested.somefield.
4th stage: $sort - Order by min ascending.
5th stage: $limit - Limit to 1 document.
6th stage: $group - Group by $_id. Set the setting field by taking the first document/value with the conditions:
If the min is greater than or equal to 0, take nested value.
Else, get "Did not match" value.
db.collection.aggregate([
{
$unwind: "$nested"
},
{
$match: {
$expr: {
$eq: [
"$nested.file.level",
"$level"
]
}
}
},
{
$set: {
min: {
$let: {
vars: {
index: {
$indexOfArray: [
[
"test",
"test2",
"test3",
"test4",
"test5"
],
"$nested.somefield"
]
}
},
in: {
$cond: {
if: {
$gt: [
"$$index",
-1
]
},
then: "$$index",
else: 100
}
}
}
}
}
},
{
$sort: {
min: 1
}
},
{
$limit: 1
},
{
$group: {
_id: "$_id",
setting: {
$first: {
$cond: {
if: {
$lte: [
"$min",
4
]
},
then: "$nested",
else: "Did not match"
}
}
}
}
}
])
Demo Solution 2 # Mongo Playground

MongoDB - using $count inside a $facet stage for the calculations

I have a collection and I want to get the total number of documents and use that amount in a $facet to calculate the 95th percentile (in the example below, the duration for each document is defined as finish_time - start_time).
Let's say I have these documents:
[
{
"_id": ObjectId("178768747638364736373637"),
"start_time": ISODate("2019-02-03T12:00:00.000Z"),
"finish_time": ISODate("2019-02-03T12:01:00.000Z")
},
{
"_id": ObjectId("266747364736363536353555"),
"start_time": ISODate("2019-02-03T12:00:00.000Z"),
"finish_time": ISODate("2019-02-03T12:03:00.000Z")
},
{
"_id": ObjectId("367463536453623546353625"),
"start_time": ISODate("2019-02-03T12:00:00.000Z"),
"finish_time": ISODate("2019-02-03T12:08:00.000Z")
}
]
I'm expecting this output:
[
{
"Percentile95Index": 2.8499999999999996,
"_id": ObjectId("178768747638364736373637"),
"duration": 60,
"totalCount": 3
},
{
"Percentile95Index": 2.8499999999999996,
"_id": ObjectId("266747364736363536353555"),
"duration": 180,
"totalCount": 3
},
{
"Percentile95Index": 2.8499999999999996,
"_id": ObjectId("367463536453623546353625"),
"duration": 480,
"totalCount": 3
}
]
So I did this (https://mongoplayground.net/p/xtHLHouzNQr):
db.collection.aggregate([
{
// Since total count will always be a one-document result, I need $facet to run multiple aggregation pipelines and then merge results.
$facet: {
totalCount: [
{
$count: "value"
}
],
pipelineResults: [
{
"$project": {
duration: {
"$divide": [
{
"$subtract": [
"$finish_time",
"$start_time"
]
},
1000
]// duration is in seconds
},
Percentile95Index: {
"$multiply": [
0.95,
"$totalCount.value" // HERE I'D LIKE TO ACCESS THE TOTAL_COUNT!!!!
]
}
}
}
]
}
},
{
$unwind: "$totalCount"
},
{
$unwind: "$pipelineResults"
},
{
$replaceRoot: {
newRoot: {
$mergeObjects: [
"$pipelineResults",
{
totalCount: "$totalCount.value"
}
]
}
}
}
])
As you can see, inside the Percentile95Index field, I'd like to access the totalCount.value but it's not accessible there and I can't figure out how to do it.
Any help?
For my opinion, I don't think the $facet stage is needed based on your scenario. (Would be great if you can share the reason to use $facet).
To count the total documents in the collection and render the count value to each document, you may look for $setWindowFields.
$setWindowFields - Render the totalCount field with the value of the total number of documents in the collection.
$project - Decorate the output documents.
db.collection.aggregate([
{
$setWindowFields: {
output: {
totalCount: {
$sum: 1
}
}
}
},
{
"$project": {
totalCount: 1,
duration: {
"$divide": [
{
"$subtract": [
"$finish_time",
"$start_time"
]
},
1000
]
},
Percentile95Index: {
"$multiply": [
0.95,
"$totalCount"
]
}
}
}
])
Demo # MongoPlayground

How can I exclude results that contain a specific element from grouped results?

A: It should be output how many _ids are included by date grouped by date.
B: The number of elements in details in A.
If it has element, count 1. not 0. If the document is as follows, the value counted after excluding from A becomes B
{
_id: ObjectId
details: array //no elements
createdAt: Date
}
C: The count of B becomes C, except when there are specific details.slaesManagerIds among B.
details.salesManagerIds is provided as an array.
For examples,
[ObjecttId("612f57184205db63a3396a9e"), ObjectId("612cb021278f621a222087d7")]
I made query as follows.
https://mongoplayground.net/p/6sBxAmO_31y
It goes well until B. How can I write a query to get C ?
If you write and execute a query that can obtain C through the link above, you should get the following result.
[
{
"A": 2,
"B": 1,
"C": 1,
"_id": "2018-05-19"
},
{
"A": 3,
"B": 3,
"C": 1,
"_id": "2018-05-18"
}
]
use $filter
db.collection.aggregate([
{
$group: {
_id: {
$dateToString: {
format: "%Y-%m-%d",
date: "$createdAt"
}
},
A: {
$sum: 1
},
B: {
$sum: {
$cond: [
{
$and: [
{
$isArray: "$details"
},
{
$gt: [
{
$size: "$details"
},
0
]
}
]
},
1,
0
]
}
},
C: {
$sum: {
$cond: [
{
$and: [
{
$isArray: "$details"
},
{
$gt: [
{
$size: "$details"
},
0
]
},
{
$gt: [
{
$size: {
$filter: {
input: "$details",
as: "d",
cond: {
$and: [
{
$not: [
{
$in: [
"$$d.salesManagerId",
[
ObjectId("612f57184205db63a3396a9e"),
ObjectId("612cb021278f621a222087d7")
]
]
}
]
}
]
}
}
}
},
0
]
}
]
},
1,
0
]
}
}
}
},
{
$sort: {
_id: -1
}
}
])
mongoplayground

Mongodb: is it possible to do this in one query?

I am new to Mongodb, Here is my document format:
{
"_id": {
"$oid": "5ee023790a0e502e3a9ce9e7"
},
"data": {
"Quick": [
["1591745491", "4", "uwp"],
["1591745492", "4", "uwp"],
["1591745516", "12", "Word"],
["1591747346", "8", "uwp"]
]
"Key": [
["1591747446", "Num"]
]
"Search": [
["1591745491", "tty"],
["1591745492", "erp"],
["1591745516", "Word"],
["1591747346", "uwp"]
]
},
"devicecode": "MP1G5L9EMP1G5L9E#LENOVO"
}
What I want to do is:
group by devicecode
for each group, count how many times they used "Quick", "key" and "Search" (count how many line under the name)
Currently I am using a python program to get this done. but I believe that should be a way to get it done within Mongodb.
The output format should look like this:
devicecode: MP1G5L9EMP1G5L9E#LENOVO, Quick: 400, key: 350, Search: 660
...
You could use aggregation framework to compute the length of individual arrays in the $set stage and then in the $group stage group-by device while summing up the computed array length values from the previous stage. Finally, in the $project stage map _id to devicecode and deselect _id.
db.getCollection("testcollection").aggregate([
{
$set: {
QuickLen: {
$size: {
$ifNull: [
"$data.Quick",
[]
]
}
},
KeyLen: {
$size: {
$ifNull: [
"$data.Key",
[]
]
}
},
SearchLen: {
$size: {
$ifNull: [
"$data.Search",
[]
]
}
}
}
},
{
$group: {
_id: "$devicecode",
Quick: {
$sum: "$QuickLen"
},
key: {
$sum: "$KeyLen"
},
Search: {
$sum: "$SearchLen"
}
}
},
{
$project: {
devicecode: "$_id",
Quick: 1,
key: 1,
Search: 1,
_id: 0
}
}
])

MongoDB 4.2 combine aggregation pipeline update with array filters

I saw that MongoDB 4.2 introduces aggregation pipeline updates, which allows you to set document fields based on other fields in the document.
Considering the following document
{
ean: "12345",
orderedQty: 2,
fulfilledQty: 1,
"status": "pending"
}
I could use the following command to increment the fulfilledQty by 1 and if the orderedQty matches the fulfilledQty set the status accordingly:
db.collection.findOneAndUpdate({}, [
{
"$set": {
"orderedQty": {
"$add": [ "$fulfilledQty", 1 ]
}
},
"$set": {
"status": {
"$cond": {
"if": { "$eq": ["$orderedQty", "$fulfilledQty"] },
"then": "fulfilled",
"else": "pending"
}
}
}
}
])
My question: How would i perform this on an array. Say I have a document like this:
_id: "test",
items: [
{ean: "12345", orderedQty: 2, fulfilledQty: 1, "status": "pending"},
{ean: "67891", orderedQty: 1, fulfilledQty: 1, "status": "fulfilled"}
]
Given I have the params ean = 12345 and an increase value by 1. How could I target the specific array item with EAN 12345, increase the fulfilledQty by 1 and set the status? I want to only chance the status field and fulfilledQty field and leave the rest of the items array as is. So expected outcome would be:
_id: "test",
items: [
{ean: "12345", orderedQty: 2, fulfilledQty: 2, "status": "fulfilled"},
{ean: "67891", orderedQty: 1, fulfilledQty: 1, "status": "fulfilled"}
]
I found the following workflow (works only for mongodb 4.2+), but it's amazingly verboseā€¦
Given that there are two variables, an item identifier (called ean) and a quantity that was shipped (called fulfilledQty)
collection.update({}, [
{
$set: {
items: {
$map: {
input: "$items",
as: "item",
in: {
$mergeObjects: [
"$$item",
{
fulfilledQty: {
$switch: {
branches: [
{
case: {
$eq: ["$$item.ean", ean]
},
then: {
$toInt: {
$add: ["$$item.fulfilledQty", fulfilledQty]
}
}
}
],
default: "$$item.fulfilledQty"
}
}
}
]
}
}
}
}
},
{
$set: {
items: {
$map: {
input: "$items",
as: "item",
in: {
$mergeObjects: [
"$$item",
{
status: {
$cond: {
if: {
$eq: ["$$item.orderedQty", "$$item.fulfilledQty"]
},
then: "fulfilled",
else: "$$item.status"
}
}
}
]
}
}
}
}
}
]);
I used a switch statement since in my use case I have multiple different EANs. Downside is that I had to use a $map operation, so it always iterates over the whole items array.