Mongodb query to get count of field based on the value for a matching string - mongodb

I have the following Mongodb document.
{
"_id" : ObjectId("62406bfaa1d66f8d99c6e97d"),
"skill": "Programming Language"
"supply" : [
{
"employeeName" : "A1",
"skillRating" : 3
},
{
"employeeName" : "A2",
"skillRating" : 4
},
{
"employeeName" : "A3",
"skillRating" : 4
},
{
"employeeName" : "A4",
"skillRating" : 4
},
{
"employeeName" : "A5",
"skillRating" : 3
},
{
"employeeName" : "A6",
"skillRating" : 4
},
{
"employeeName" : "A7",
"skillRating" : 2
},
{
"employeeName" : "A8",
"skillRating" : 2
},
{
"employeeName" : "A9",
"skillRating" : 4
},
{
"employeeName" : "A10",
"skillRating" : 3
},
{
"employeeName" : "A11",
"skillRating" : 3
},
{
"employeeName" : "A12",
"skillRating" : 3
},
{
"employeeName" : "A13",
"skillRating" : 2
},
{
"employeeName" : "A14",
"skillRating" : 4
},
{
"employeeName" : "A15",
"skillRating" : 4
}
]
}
How can I write a Mongodb query to produce the following output (i.e.: Get the count of occurrence of each value for a matching skill)
{
skillName : "Programming Language",
skillRating1: 0, <-- Count of skillRating with value 1
skillRating2: 3, <-- Count of skillRating with value 2
skillRating3: 5, <-- Count of skillRating with value 3
skillRating4: 7, <-- Count of skillRating with value 4
skillRating5: 0 <-- Count of skillRating with value 5
}
[Note: I am learning to write Mongodb queries]

You can go with aggregation,
$unwind to deconstruct the array
$group to get the sum of avg by _id and the avg
$arrayToObject to make the field to object with the help of $concat. Because we need the skillRating1,skillRating2...
$replaceRoot to get the object to root document
$project to decide whether to show or not
Here is the code,
db.collection.aggregate([
{ "$unwind": "$supply" },
{
"$group": {
"_id": { _id: "$_id", avg: "$supply.avgSkillRating" },
"count": { "$sum": 1 },
"skill": { "$first": "$skill" }
}
},
{
"$group": {
"_id": "$_id._id",
"skill": { "$first": "$skill" },
"data": {
$push: {
k: {
$concat: [ "avgSkillRating", { $toString: "$_id.avg" } ]
},
v: "$count"
}
}
}
},
{ "$addFields": { "data": { "$arrayToObject": "$data" } } },
{
"$replaceRoot": {
"newRoot": { "$mergeObjects": [ "$$ROOT", "$data" ] }
}
},
{ "$project": { data: 0 } }
])
Working Mongo playground

Maybe something like this:
db.collection.aggregate([
{
$unwind: "$supply"
},
{
$group: {
_id: "$supply.avgSkillRating",
cnt: {
$push: "$supply.avgSkillRating"
},
skill: {
$first: "$skill"
}
}
},
{
$project: {
z: [
{
"k": {
"$concat": [
"avgSkillRating",
{
$toString: "$_id"
}
]
},
"v": {
$size: "$cnt"
}
}
],
skill: 1
}
},
{
$replaceRoot: {
newRoot: {
"$mergeObjects": [
{
"$arrayToObject": "$z"
},
{
skillName: "$skill"
}
]
}
}
},
{
$group: {
_id: "$skillName",
x: {
$push: "$$ROOT"
}
}
},
{
"$replaceRoot": {
"newRoot": {"$mergeObjects": "$x"}
}
}
])
Explained:
Unwind the supply array
group avgSkillRating to array cnt ( to be possible to count )
form z array with k,v suitable for arrayToObject
mergeObjects to form the keys and values
group to join the objects and leave only single skillName
replace the root document with the newly formed document with the necesary details.
playground

Here's another version that also reports skillRatings with a zero count. This aggregation pipeline is essentially identical to #varman's answer and adds a complex (to me anyway) "$set"/"$map" to create the extra fields.
db.collection.aggregate([
{
"$unwind": "$supply"
},
{
"$group": {
"_id": { "_id": "$_id", "avg": "$supply.avgSkillRating" },
"count": { "$count": {} },
"skillName": { "$first": "$skill" }
}
},
{
"$group": {
"_id": "$_id._id",
"skillName": { "$first": "$skillName" },
"data": {
"$push": {
"_r": "$_id.avg",
"k": { $concat: [ "skillRating", { $toString: "$_id.avg" } ] },
v: "$count"
}
}
}
},
{
"$set": {
"data": {
"$map": {
"input": { "$range": [ 1, 6 ] },
"as": "rate",
"in": {
"$let": {
"vars": {
"idx": { "$indexOfArray": [ "$data._r", "$$rate" ] }
},
"in": {
"$cond": [
{ "$gte": [ "$$idx", 0 ] },
{
"k": {
"$getField": {
"field": "k",
"input": { "$arrayElemAt": [ "$data", "$$idx" ] }
}
},
"v": {
"$getField": {
"field": "v",
"input": { "$arrayElemAt": [ "$data", "$$idx" ] }
}
}
},
{
"k": { $concat: [ "skillRating", { $toString: "$$rate" } ] },
"v": 0
}
]
}
}
}
}
}
}
},
{ "$set": { "data": { "$arrayToObject": "$data" } } },
{ "$replaceWith": { "$mergeObjects": [ "$$ROOT", "$data" ] } },
{ "$unset": [ "data", "_id" ] }
])
Try it mongoplayground.net.

Related

Simple MongoDB Aggregation

I'm a bit confused on how to group using aggregation but still be able to extract specific values from arrays:
db.collection.aggregate([
{ "$unwind": f"${stat_type}" },
{
"$group": {
"_id": "$userId",
"value" : { "$max" : f"${stat_type}.stat_value" },
"character" : f"${stat_type}.character_name", <-- how do I extract this value that matches where the $max from above is grabbed.
}
},
{ "$sort": { "value": -1 }},
{ '$limit' : 30 }
])
Sample Entries:
{
'name' : "Tony",
'userId' : 12345,
'damage_dealt' : [
"character_name" : "James",
"stat_value" : 100243
]
}
{
'name' : "Jimmy",
'userId' : 12346,
'damage_dealt' : [
"character_name" : "James",
"stat_value" : 1020243
]
}
{
'name' : "Tony",
'userId' : 12345,
'damage_dealt' : [
"character_name" : "Lebron",
"stat_value" : 99900243
]
}
A sample output for what I'm looking for is below:
[
{
'_id':12345,
'user' : 'Tony'
'character_name' : 'Lebron',
'stat_value' : 99900243
},
{
'_id':12346,
'user' : 'Jimmy'
'character_name' : 'James',
'stat_value' : 1020243
}
]
You can use the $top accumulator to achieve the desired result. Like this:
db.collection.aggregate([
{
"$unwind": "$damage_dealt"
},
{
"$group": {
"_id": "$userId",
"value": {
$top: {
output: {
character_name: "$damage_dealt.character_name",
stat_value: "$damage_dealt.stat_value"
},
sortBy: {
"damage_dealt.stat_value": -1
}
}
},
}
},
{
"$project": {
character_name: "$value.character_name",
stat_value: "$value.stat_value"
}
},
{
"$sort": {
"stat_value": -1
}
},
{
"$limit": 30
}
])
Playground link.
Or collects all the group elements in an array, and the max stat_value, then pick the object from the array containing the max stat_value.
db.collection.aggregate([
{
"$unwind": "$damage_dealt"
},
{
"$group": {
"_id": "$userId",
"max_stat": {
"$max": "$damage_dealt.stat_value"
},
"damages": {
"$push": {
name: "$name",
damage_value: "$damage_dealt"
}
}
}
},
{
"$project": {
"damages": {
"$arrayElemAt": [
{
"$filter": {
"input": "$damages",
"as": "damage",
"cond": {
"$eq": [
"$$damage.damage_value.stat_value",
"$max_stat"
]
}
}
},
0
]
}
}
},
{
"$project": {
"character_name": "$damages.damage_value.character_name",
"stat_value": "$damages.damage_value.stat_value",
"name": "$damages.name"
}
},
{
"$sort": {
"stat_value": -1
}
},
{
"$limit": 30
}
])
Playground link.
Here's another way you could do it.
db.collection.aggregate([
{
"$group": {
"_id": "$userId",
"user": {"$first": "$name"},
"damage_dealts": {"$push": "$damage_dealt"},
"maxStat": {"$max": {"$first": "$damage_dealt.stat_value"}}
}
},
{
"$set": {
"outChar": {
"$first": {
"$arrayElemAt": [
"$damage_dealts",
{"$indexOfArray": ["$damage_dealts.stat_value", "$maxStat"]}
]
}
}
}
},
{
"$project": {
"user": 1,
"character_name": "$outChar.character_name",
"stat_value": "$outChar.stat_value"
}
},
{"$sort": {"stat_value": -1}},
{"$limit": 30}
])
Try it on mongoplayground.net.

MongoDB : group and count users by gender, civilStatus and professionalCategory

I have a collection of users, each user has a profile. I want to implement a query to make statistics on users.
This is my collection.
[
{
"_id": ObjectId("61d2db0d273a9076d630697b"),
"state": "VALIDATED",
"phone": "xxx",
"civilStatus": "SINGLE",
"gender": "MALE",
"professionalCategory": "STUDENT"
}
]
I want the result to contain an array of all genders of users in the database, and the number of users with each gender. same for civilStatus and professionalCategories
This is the result i am looking for :
{
"total": 2000
"validated": 1800,
"genders": [
{
"value": "MALE",
"count": 1200
},
{
"value": "FEMALE",
"count": 600
}
],
"civilStatus": [
{
"value": "SINGLE",
"count": "300"
}
...
],
"professionalCategories": [
{
"value": "STUDENT",
"count": "250"
}
...
]
}
I implemented the query, but I still have a few things that I don't know how to do.
db.getCollection("users").aggregate([
{
$group: {
_id: null,
validated: {
$sum: {
$cond: {
if: { $eq: ["$state", "VALIDATED"] },
then: 1,
else: 0
}
}
},
genders: {
$push: "$gender"
},
civilStatus: {
$push: "$civilStatus"
},
professionalCategories: {
$push: "$professionalCategory"
}
}
}
])
This is the result of this query :
{
"total": 2000
"validated": 1800,
"genders": [
"MALE",
"MALE",
"FEMALE",
"MALE",
"FEMALE",
"FEMALE"
...
],
"civilStatus": [
"SINGLE",
"MARIED",
"SINGLE",
...
],
"professionalCategories": [
"STUDENT",
"WORKER",
"RETIRED"
...
]
}
I miss how to group each gender, civil Status and professional Category and calculate the number of users for each one.
I also tried this query, but I don't know how to complete the "count" field for each item of the array :
db.getCollection("users").aggregate([
{
$group: {
_id: null,
validated: {
$sum: {
$cond: {
if: { $eq: ["$state", "VALIDATED"] },
then: 1,
else: 0
}
}
},
genders: {
$addToSet: {
value: "$gender",
count: {
//
}
}
},
civilStatus: {
$addToSet: {
value: "$civilStatus",
count: {
//
}
}
},
professionalCategories: {
$addToSet: {
value: "$professionalCategory",
count: {
//
}
}
},
}
}
])
if the query was to treat only one field, for example gender. it would have been easier with "unwind". but here I have 3 fields.
can someone help me please?
You can use following aggregation
Here is the code
db.collection.aggregate([
{
"$facet": {
"genders": [
{
"$group": {
"_id": "$gender",
"total": { $sum: 1 }
}
}
],
"civilStatus": [
{
"$group": {
"_id": "$civilStatus",
"total": { $sum: 1 }
}
}
],
"professionalCategory": [
{
"$group": {
"_id": "$professionalCategory",
"total": { $sum: 1 }
}
}
],
"validated": [
{
"$group": {
"_id": "$state",
"total": { "$sum": 1 }
}
}
]
}
},
{
$set: {
validated: {
"$filter": {
"input": "$validated",
"cond": {
"$eq": [ "$$this._id", "VALIDATED" ]
}
}
}
}
},
{
$set: {
validated: {
"$ifNull": [
{
"$arrayElemAt": [ "$validated", 0 ]
},
0
]
}
}
},
{
$set: { validated: "$validated.total" }
}
])
Working Mongo playground

Fill day gaps of two-dimensional timeseries data in MongoDB with aggregate

I have a collection of two-dimensional timeseries data as follows:
[
{
"value" : 9,
"timestamp" : "2020-12-30T02:06:33.000+0000",
"recipeId" : 15
},
{
"value" : 2,
"timestamp" : "2020-12-30T12:04:23.000+0000",
"recipeId" : 102
},
{
"value" : 5,
"timestamp" : "2020-12-30T15:09:23.000+0000",
"recipeId" : 102
},
...
]
The records have a recipeId which is the first level of grouping I'm looking for. All values for a day of a recipe should be summed up. I want an array of timeseries per recipeId. I need the missing days to be filled with a 0. I want this construct to be created for a provided start and end date range.
Some like this for date range of 2020-12-29 to 2020-12-31:
[
[
{
"sum" : 0,
"timestamp" : "2020-12-29",
"recipeId" : 15
},
{
"sum" : 9,
"timestamp" : "2020-12-30",
"recipeId" : 15
},
{
"sum" : 0,
"timestamp" : "2020-12-31",
"recipeId" : 15
},
...
],
[
{
"sum" : 0,
"timestamp" : "2020-12-29",
"recipeId" : 0
},
{
"sum" : 7,
"timestamp" : "2020-12-30",
"recipeId" : 102
},
{
"sum" : 0,
"timestamp" : "2020-12-31",
"recipeId" : 102
},
...
]
]
This is what I currently have and it's only partially solving my requirements. I can't manage to get the last few stages right:
[
{
"$match": {
"timestamp": {
"$gte": "2020-12-29T00:00:00.000Z",
"$lte": "2020-12-31T00:00:00.000Z"
}
}
},
{
"$addFields": {
"timestamp": {
"$dateFromParts": {
"year": { "$year": "$timestamp" },
"month": { "$month": "$timestamp" },
"day": { "$dayOfMonth": "$timestamp" }
}
},
"dateRange": {
"$map": {
"input": {
"$range": [
0,
{
"$trunc": {
"$divide": [
{
"$subtract": [
"2020-12-31T00:00:00.000Z",
"2020-12-29T00:00:00.000Z"
]
},
1000
]
}
},
86400
]
},
"in": {
"$add": [
"2020-12-29T00:00:00.000Z",
{ "$multiply": ["$$this", 1000] }
]
}
}
}
}
},
{ "$unwind": "$dateRange" },
{
"$group": {
"_id": { "date": "$dateRange", "recipeId": "$recipeId" },
"count": {
"$sum": { "$cond": [{ "$eq": ["$dateRange", "$timestamp"] }, 1, 0] }
}
}
},
{
"$group": {
"_id": "$_id.date",
"total": { "$sum": "$count" },
"byRecipeId": {
"$push": {
"k": { "$toString": "$_id.recipeId" },
"v": { "$sum": "$count" }
}
}
}
},
{ "$sort": { "_id": 1 } },
{
"$project": {
"_id": 0,
"timestamp": "$_id",
"total": "$total",
"byRecipeId": {
"$arrayToObject": {
"$filter": { "input": "$byRecipeId", "cond": "$$this.v" }
}
}
}
}
]
which results in:
[
{
"timestamp": "2020-12-29T00:00:00.000Z",
"total": 21,
"byRecipeId": {}
},
{
"timestamp": "2020-12-30T00:00:00.000Z",
"total": 0,
"byRecipeId": {
"15": 9,
"102": 7
}
},
{
"timestamp": "2020-12-31T00:00:00.000Z",
"total": 0,
"byRecipeId": {}
}
]
I'm open to alternative solution of course. For examples I came across this post: https://medium.com/#alexandro.ramr777/fill-missing-values-using-mongodb-aggregation-framework-f011114e83e0 but it doesn't deal with multi-dimensions.
You could use the $redcue function. This code fills the gabs of Minutes for current day. Should be easy to adapt it to give missing Days.
{
$addFields: {
data: {
$reduce: {
input: { $range: [0, 24 * 60] },
initialValue: [],
in: {
$let: {
vars: {
ts: {
$add: [
moment().startOf('day').toDate(),
{ $multiply: ["$$this", 1000 * 60] }
]
}
},
in: {
$concatArrays: [
"$$value",
[{
$cond: {
if: { $in: ["$$ts", "$data.timestamp"] },
then: {
$first: {
$filter: {
input: "$data",
cond: { $eq: ["$$this.timestamp", "$$ts"] }
}
}
},
else: { timestamp: "$$ts", total: 0 }
}
}]
]
}
}
}
}
}
}
}
In my opinion, $reduce is more elegant than $map, however based on my experience the performance is much worse with $reduce.

MongoDB query subdocument for records that don't match criteria

I currently have the following query:
db.getCollection('conversations').aggregate([
{
$lookup: {
foreignField: "c_ID",
from: "messages",
localField: "_id",
as: "messages"
}
},
{
"$unwind": "$messages"
},
{
"$sort": {
"messages.t": -1
}
},
{
"$group": {
"_id": "$_id",
"lastMessage": {
"$first": "$messages"
},
"allFields": {
"$first": "$$ROOT"
}
}
},
{
"$replaceRoot": {
"newRoot": {
"$mergeObjects": [
"$allFields",
{
"lastMessage": "$lastMessage"
}
]
}
}
},
{
$project: {
messages: 0
}
},
{
$match: {
"members.uID": "1",
//"lastMessage.t": { $gt: ISODate("2020-02-04 20:38:02.154Z") }
}
},
{
$sort: { "lastMessage.t": 1 }
},
{
$limit: 10
},
{
$project: {
members: {
$slice: [ {
$filter: {
input : "$members", as : "member", cond : {
$ne : ["$$member.uID" , "1"]
}
}
}, 3 ]
}
}
},
])
However, I also have a field for each member, named "l", which contains a timestamp. It means someone has left a conversation and thus represents the leave date. I don't want anyone who left before the current timestamp (e.g. 1582056056) to be included in the members list. How can I do this?
EDIT:
conversations document
{
"_id" : ObjectId("5e35f2c840713a43aeeeb3d9"),
"members" : [
{
"uID" : "1",
"j" : 1580580922
},
{
"uID" : "4",
"j" : 1580580922,
ā€œlā€: 1580581982
},
{
"uID" : "5",
"j" : 1580580922
}
]
}
messages document
{
"_id" : ObjectId("5e35ee5f40713a43aeeeb1c5"),
"c_ID" : ObjectId("5e35f2c840713a43aeeeb3d9"),
"fromID" : "1",
"msg" : "What's up?",
"t" : 1580591922,
"d" : {
"4" : 1580592039
},
"r" : {
"4" : 1580592339
}
}
We can exclude them during $filter stage with $and and $or operators.
member.uID != 1 && (member.l == undefined || lastMessage.t < member.l)
Take a look query below.
db.conversations.aggregate([
{
$lookup: {
from: "messages",
foreignField: "c_ID",
localField: "_id",
as: "messages"
}
},
{
"$unwind": "$messages"
},
{
"$sort": {
"messages.t": -1
}
},
{
"$group": {
"_id": "$_id",
"lastMessage": {
"$first": "$messages"
},
"allFields": {
"$first": "$$ROOT"
}
}
},
{
"$replaceRoot": {
"newRoot": {
"$mergeObjects": [
"$allFields",
{
"lastMessage": "$lastMessage"
}
]
}
}
},
{
$project: {
messages: 0
}
},
{
$match: {
"members.uID": "1"
}
},
{
$sort: {
"lastMessage.t": 1
}
},
{
$limit: 10
},
{
$project: {
members: {
$slice: [
{
$filter: {
input: "$members",
as: "member",
cond: {
$and: [
{
$ne: [
"$$member.uID",
"1"
]
},
{
$or: [
{
$eq: [
"$$member.l",
undefined
]
},
{
$lt: [
"$lastMessage.t",
"$$member.l"
]
}
]
}
]
}
}
},
3
]
}
}
}
])
MongoPlayground

Query to get a value by subtracting a value from current and next document

I have a mongo db collection like below,
{
"id": ObjectId("132456"),
reading :[
{
"weight" : {
"measurement" : 82.0,
"unit" : "kg"
}
}
],
"date" : ISODate("2018-09-12T11:45:08.174Z")
},
{
"id": ObjectId("132457"),
reading :[
{
"weight" : {
"measurement" : 80.0,
"unit" : "kg"
}
}
],
"date" : ISODate("2018-09-12T10:45:08.174Z")
},
{
"id": ObjectId("132458"),
reading :[
{
"weight" : {
"measurement" : 85.0,
"unit" : "kg"
}
}
],
"date" : ISODate("2018-09-11T09:45:08.174Z")
}
I need a mongo db query that will give me the current weight and the weight difference between the current and next record.
Example output below,
{
"id": ObjectId("132456"),
"currentWeight": 75.0,
"weightDifference": 2.0,
"date" : ISODate("2018-09-12T11:45:08.174Z")
},
{
"id": ObjectId("132457"),
"currentWeight": 80.0,
"weightDifference": -5.0,
"date" : ISODate("2018-09-12T10:45:08.174Z")
}
I was not able to get the weight from next document to subtract the weight from current document.
Thanks in advance for your help
My try for the above problem,
db.measurementCollection.aggregate([
{
$match : { "date" : { $gte : new ISODate("2018-09-01T00:00:00.000Z") , $lte : new ISODate("2018-09-12T23:59:59.000Z") } }
},
{
$project : { "date" : 1 ,
"currentWeight" : {$arrayElemAt: [ "$reading.weight.measurement", 0 ]}
},
{ $sort: {"date":-1} },
{
$addFields : {
"weigtDifference" :
{
{
$limit: 2
},
{
$group: {
_id: null,
'count1': {$first: '$currentWeight'},
'count2': {$last: '$currentWeight'}
}
},
{
$subtract: ['$count1', '$count2']
}
}
}
}
])
You can try below aggregation but I will not recommend you to use this with the large data set.
db.collection.aggregate([
{ "$match": {
"date" : {
"$gte": new ISODate("2018-09-01T00:00:00.000Z"),
"$lte": new ISODate("2018-09-12T23:59:59.000Z")
}
}},
{ "$unwind": "$reading" },
{ "$sort": { "date": -1 }},
{ "$group": { "_id": null, "data": { "$push": "$$ROOT" }}},
{ "$project": {
"data": {
"$filter": {
"input": {
"$map": {
"input": { "$range": [0, { "$size": "$data" }] },
"as": "tt",
"in": {
"$let": {
"vars": {
"first": { "$arrayElemAt": ["$data", "$$tt"] },
"second": { "$arrayElemAt": ["$data", { "$add": ["$$tt", 1] }] }
},
"in": {
"currentWeight": "$$first.reading.weight.measurement",
"weightDifference": { "$subtract": ["$$second.reading.weight.measurement", "$$first.reading.weight.measurement"] },
"_id": "$$first._id",
"date": "$$first.date"
}
}
}
}
},
"cond": { "$ne": ["$$this.weightDifference", null] }
}
}
}
},
{ "$unwind": "$data" },
{ "$replaceRoot": { "newRoot": "$data" }}
])