How to $push a field depending on a condition? - mongodb

I'm trying to conditionally push a field into an array during the $group stage of the MongoDB aggregation pipeline.
Essentially I have documents with the name of the user, and an array of the actions they performed.
If I group the user actions like this:
{ $group: { _id: { "name": "$user.name" }, "actions": { $push: $action"} } }
I get the following:
[{
"_id": {
"name": "Bob"
},
"actions": ["add", "wait", "subtract"]
}, {
"_id": {
"name": "Susan"
},
"actions": ["add"]
}, {
"_id": {
"name": "Susan"
},
"actions": ["add, subtract"]
}]
So far so good. The idea would be to now group together the actions array to see which set of user actions are the most popular. The problem is that I need to remove the "wait" action before taking into account the group. Therefore the result should be something like this, taking into account that the "wait" element should not be considered in the grouping:
[{
"_id": ["add"],
"total": 1
}, {
"_id": ["add", "subtract"],
"total": 2
}]
Test #1
If I add this $group stage:
{ $group : { _id : "$actions", total: { $sum: 1} }}
I get the count that I want, but it takes into account the unwanted "wait" array element.
[{
"_id": ["add"],
"total": 1
}, {
"_id": ["add", "subtract"],
"total": 1
}, {
"_id": ["add", "wait", "subtract"],
"total": 1
}]
Test #2
{ $group: { _id: { "name": "$user.name" }, "actions": { $push: { $cond: { if:
{ $ne: [ "$action", 'wait']}, then: "$action", else: null } }}} }
{ $group : { _id : "$actions", total: { $sum: 1} }}
This is as close as I've gotten, but this pushes null values where the wait would be, and I can't figure out how to remove them.
[{
"_id": ["add"],
"total": 1
}, {
"_id": ["add", "subtract"],
"total": 1
}, {
"_id": ["add", null, "subtract"],
"total": 1
}]
UPDATE:
My simplified documents look like this:
{
"_id": ObjectID("573e0c6155e2a8f9362fb8ff"),
"user": {
"name": "Bob",
},
"action": "add",
}

You need a preliminary $match stage in your pipeline to select only those documents where "action" is not equals to "wait".
db.collection.aggregate([
{ "$match": { "action": { "$ne": "wait" } } },
{ "$group": {
"_id": "$user.name",
"actions": { "$push": "$action" },
"total": { "$sum": 1 }
}}
])

Related

Find duplicate name in MongoDB

I'm having a problem in getting the duplicate name in my mongodb to delete duplicates.
{
"users": [
{
"_id": {
"$oid": "61441890a6566a001623b8ed"
},
"name": "Jollibee",
},
{
"_id": {
"$oid": "61441890a6566a001623b8ed"
},
"name": "Jollibee",
},
{
"_id": {
"$oid": "61441890a6566a001623b8ed"
},
"name": "MCDO",
},
{
"_id": {
"$oid": "61441890a6566a001623b8ed"
},
"name": "Burger King",
},
]
}
I want to show in my output only the duplicate names. which is Jollibee.
tried this approach but it only returns me the count of all the users not the duplicated ones. I want to show 2 Jollibee only.
db.collection.aggregate([
{
"$unwind": "$users"
},
{
"$group": {
"_id": "$_id",
"count": {
"$sum": 1
}
}
},
{
"$match": {
"_id": {
"$ne": null
},
"count": {
"$gt": 1
}
}
}
])
Suppose the documents are:
[
{
"_id": {
"$oid": "6226dd742ef592186422ad1d"
},
"name": "Stack test"
},
{
"_id": {
"$oid": "6226dd7d2ef592186422ad1e"
},
"name": "Stack test"
},
{
"_id": {
"$oid": "6226dd912ef592186422ad1f"
},
"name": "Stack test 001"
}
]
Aggreagtion Query:
db.users.aggregate(
[
{
$group: {
_id: "$name",
names: {$push: "$name"}
}
}
]
)
Result:
{
_id: 'Stack test',
names: [ 'Stack test', 'Stack test' ]
},
{
_id: 'Stack test 001',
names: [ 'Stack test 001' ]
}
But a better way to do it will be
Aggregation Query:
db.users.aggregate(
[
{
$group: {
_id: "$name",
count: {$sum: 1}
}
}
]
)
Result:
{
_id: 'Stack test',
count: 2
},
{
_id: 'Stack test 001',
count: 1
}
Now, you can iterate through the count and use the name value in _id
since the $unwind step gives you same _id for all documents grouping by _id is not correct. Instead try grouping by users.name
db.collection.aggregate([
{
"$unwind": "$users"
},
{
"$group": {
"_id": "$users.name",
"count": {
"$sum": 1
}
}
},
{
"$match": {
"_id": {
"$ne": null
},
"count": {
"$gt": 1
}
}
}
])
demo

Find documents that share one key but differ in another

I have a mongodb collection that is resembles
{"dept":"A" , "email":"bob#example.com", "userID": "1"}
{"dept":"A" , "email":"bob#example.com", "userID": "1"}
{"dept":"A" , "email":"bob#example.com", "userID": "2"} <<< "bad" record
{"dept":"A" , "email":"alice#example.com", "userID": "3"}
{"dept":"B" , "email":"bob#example.com", "userID": "4"}
{"dept":"B" , "email":"kevin#example.com", "userID": "5"}
The constraint is that an email must only have a single userID per department.
How would I query the table to find which emails have multiple userIDs within a department? Mongo 4.4+
You have to use two $group pipeline stages to filter and find records with multiple entries.
db.collection.aggregate([
{
"$group": {
"_id": {
"dept": "$dept",
"email": "$email",
"userID": "$userID",
},
"individualCount": {
"$sum": 1
}
},
},
{
"$group": {
"_id": "$_id.email",
"userIDs": {
"$addToSet": "$_id.userID"
},
"dept": {
"$addToSet": "$_id.dept"
},
"totalRecordsCount": {
"$sum": "$individualCount"
},
"totalDuplicCounts": {
"$sum": 1
},
},
},
{
"$match": {
"totalDuplicCounts": {
"$gt": 1
}
},
},
])
Mongo Playground Sample Execution

MongoDB: Assign document objects to field in '$project' stage

I have a user collection:
[
{"_id": 1,"name": "John", "age": 25, "valid_user": true}
{"_id": 2, "name": "Bob", "age": 40, "valid_user": false}
{"_id": 3, "name": "Jacob","age": 27,"valid_user": null}
{"_id": 4, "name": "Amelia","age": 29,"valid_user": true}
]
I run a '$facet' stage on this collection. Checkout this MongoPlayground.
I want to talk about the first output from the facet stage. The following is the response currently:
{
"user_by_valid_status": [
{
"_id": false,
"count": 1
},
{
"_id": true,
"count": 2
},
{
"_id": null,
"count": 1
}
]
}
However, I want to restructure the output in this way:
"analytics": {
"invalid_user": {
"_id": false
"count": 1
},
"valid_user": {
"_id": true
"count": 2
},
"user_with_unknown_status": {
"_id": null
"count": 1
}
}
The problem with using a '$project' stage along with 'arrayElemAt' is that the order may not be definite for me to associate an index with an attribute like 'valid_users' or others. Also, it gets further complicated because unlike the sample documents that I have shared, my collection may not always contain all the three categories of users.
Is there some way I can do this?
You can use $switch conditional operator,
$project to show value part in v with _id and count field as object, k to put $switch condition
db.collection.aggregate([
{
"$facet": {
"user_by_valid_status": [
{
"$group": {
"_id": "$valid_user",
"count": { "$sum": 1 }
}
},
{
$project: {
_id: 0,
v: { _id: "$_id", count: "$count" },
k: {
$switch: {
branches: [
{ case: { $eq: ["$_id", null] }, then: "user_with_unknown_status" },
{ case: { $eq: ["$_id", false] }, then: "invalid_user" },
{ case: { $eq: ["$_id", true] }, then: "valid_user" }
]
}
}
}
}
],
"users_above_30": [{ "$match": { "age": { "$gt": 30 } } }]
}
},
$project stage in root, convert user_by_valid_status array to object using $arrayToObject
{
$project: {
analytics: { $arrayToObject: "$user_by_valid_status" },
users_above_30: 1
}
}
])
Playground

Group by date in mongoDB while counting other fields

I've been using MongoDB for just a week and I have problems achieving this result: I want to group my documents by date while also keeping track of the number of entries that have a certain field set to a certain value.
So, my documents look like this:
{
"_id" : ObjectId("5f3f79fc266a891167ca8f65"),
"recipe" : "A",
"timestamp" : ISODate("2020-08-22T09:38:36.306Z")
}
where recipe is either "A", "B" or "C". Right now I'm grouping the documents by date using this pymongo query:
mongo.db.aggregate(
# Pipeline
[
# Stage 1
{
"$project": {
"createdAt": {
"$dateToString": {
"format": "%Y-%m-%d",
"date": "$timestamp"
}
},
"progressivo": 1,
"temperatura_fusione": 1
}
},
# Stage 2
{
"$group": {
"_id": {
"createdAt": "$createdAt"
},
"products": {
"$sum": 1
}
}
},
# Stage 3
{
"$project": {
"label": "$_id.createdAt",
"value": "$products",
"_id": 0
}
}])
Which gives me results like this:
[{"label": "2020-08-22", "value": 1}, {"label": "2020-08-15", "value": 2}, {"label": "2020-08-11", "value": 1}, {"label": "2020-08-21", "value": 5}]
What I'd like to have is also the counting of how many times each recipe appears on every date. So, if for example on August 21 I have 2 entries with the "A" recipe, 3 with the "B" recipe and 0 with the "C" recipe, the desired output would be
{"label": "2020-08-21", "value": 5, "A": 2, "B":3, "C":0}
Do you have any tips?
Thank you!
You can do like following, what have you done is excellent. After that,
In second grouping, We just get total value and value of each recipe.
$map is used to go through/modify each objects
$arrayToObject is used to covert the array what we have done via map (key : value pair) to object
$ifNull is used for, sometimes your data might not have "A" or "B" or "C". But you need the value should be 0 if there is no name as expected output.
Here is the code
[
{
"$project": {
"createdAt": {
"$dateToString": {
"format": "%Y-%m-%d",
"date": "$timestamp"
}
},
recipe: 1,
"progressivo": 1,
"temperatura_fusione": 1
}
},
{
"$group": {
"_id": {
"createdAt": "$createdAt",
"recipeName": "$recipe",
},
"products": {
$sum: 1
}
}
},
{
"$group": {
"_id": "$_id.createdAt",
value: {
$sum: "$products"
},
recipes: {
$push: {
name: "$_id.recipeName",
val: "$products"
}
}
}
},
{
$project: {
"content": {
"$arrayToObject": {
"$map": {
"input": "$recipes",
"as": "el",
"in": {
"k": "$$el.name",
"v": "$$el.val"
}
}
}
},
value: 1
}
},
{
$project: {
_id: 1,
value: 1,
A: {
$ifNull: [
"$content.A",
0
]
},
B: {
$ifNull: [
"$content.B",
0
]
},
C: {
$ifNull: [
"$content.C",
0
]
}
}
}
]
Working Mongo playground

How to get count by order in mongodb aggregate?

I have two collections name listings and moods.
listings sample:
{
"_id": ObjectId("5349b4ddd2781d08c09890f3"),
"name": "Hotel Radisson Blu",
"moods": [
ObjectId("507f1f77bcf86cd799439010"),
ObjectId("507f1f77bcf86cd799439011")
]
}
moods sample:
{
"_id": ObjectId("507f1f77bcf86cd799439011"),
"name": "Sports"
},
{
"_id": ObjectId("507f1f77bcf86cd799439010"),
"name": "Spanish Food"
},
{
"_id": ObjectId("507f1f77bcf86cd799439009"),
"name": "Action"
}
I need this record.
{
"_id": ObjectId("507f1f77bcf86cd799439011"),
"name": "Sports",
"count": 1
},
{
"_id": ObjectId("507f1f77bcf86cd799439010"),
"name": "Spanish Food",
"count": 1
},
{
"_id": ObjectId("507f1f77bcf86cd799439009"),
"name": "Action",
"count": 0
}
I need this type of record. I have no idea about aggregate.
You can do it using aggregate(),
$lookup to join collection listings
$match pipeline to check moods _id in listings field moods array
db.moods.aggregate([
{
"$lookup": {
"from": "listings",
"as": "count",
let: { id: "$_id" },
pipeline: [
{
"$match": {
"$expr": { "$in": ["$$id", "$moods"] }
}
}
]
}
},
$addFields to add count on the base of $size of array count that we got from above lookup
{
$addFields: {
count: { $size: "$count" }
}
}
])
Playground
did this work:
db.collection.aggrate().count()
Try to combine the functions, it might work.