mongo query: add a new field with ranking number based on another field - mongodb

I am new to mongo queries. Currently I have a collection like this, which is used to create a d3 force-directed graph.
{
"_id": "allesgute3",
"nodes": [{
"id": "bmw#gmail.com",
"count": 15,
"nodeUpdatetime": 1525341732
}, {
"id": "abc#gmail.com",
"count": 10,
"nodeUpdatetime": null
}, {
"id": "xyz#gmail.com",
"count": 8,
"nodeUpdatetime": 1525408742
}, {
"id": "wilson#gmail.com",
"count": 4,
"nodeUpdatetime": 1525423847
}, {
"id": "niv#gmail.com",
"count": 6,
"nodeUpdatetime": 1525447758
}, {
"id": "car#gmail.com",
"count": 9,
"nodeUpdatetime": 1525447763
},
{
"id": "jason#gmail.com",
"count": 1,
"nodeUpdatetime": 1525447783
}
],
"links": [{
"source": "bmw#gmail.com",
"target": "jason#gmail.com",
"timestamp": 1525312111
}, {
"source": "car#gmail.com",
"target": "jason#gmail.com",
"timestamp": 1525334013
}, {
"source": "bmw#gmail.com",
"target": "car#gmail.com",
"timestamp": 1525334118
}]
}
Using a mongo query, I would like to generate the output to something like this. Basically for the nested data under "nodes", add a new field called "topn" and rank them by count from 1 to 5. The remainder values are null. Can anyone help? Thank you!
{
"_id": "allesgute3",
"nodes": [{
"id": "bmw#gmail.com",
"count": 15,
"nodeUpdatetime": 1525341732,
"topn": 1
}, {
"id": "abc#gmail.com",
"count": 10,
"nodeUpdatetime": null,
"topn": 2
}, {
"id": "xyz#gmail.com",
"count": 8,
"nodeUpdatetime": 1525408742,
"topn": 4
}, {
"id": "wilson#gmail.com",
"count": 4,
"nodeUpdatetime": 1525423847,
"topn": null
}, {
"id": "niv#gmail.com",
"count": 6,
"nodeUpdatetime": 1525447758,
"topn": 5
}, {
"id": "car#gmail.com",
"count": 9,
"nodeUpdatetime": 1525447763,
"topn": 3
},
..............

The following should get you what you want:
db.collection.aggregate({
$unwind: "$nodes" // flatten the "nodes" array
}, {
$sort: { "nodes.count": -1 } // sort descending by "count"
}, {
$group: { // create the original structure again - just with sorted array elements
_id: "$_id",
nodes: { "$push": "$nodes" }
}
}, {
$addFields: {
"nodes": {
$zip: { // zip two arrays together
inputs: [
"$nodes", // the first one being the existing and now sorted "nodes" array
{ $range: [ 1, 6 ] } // and the second one being [ 1, 2, 3, 4, 5 ]
],
useLongestLength: true // do not stop after five elements but instead continue using a "null" value
}
}
}
}, {
$addFields: {
"nodes": {
$map: { // transform the "nodes" array
input: "$nodes",
as: "this",
in: {
$mergeObjects: [ // by merging two objects
{ $arrayElemAt: [ "$$this", 0] }, // the first sits at array position 0
{
topn: { $arrayElemAt: [ "$$this", 1] } // the second will be a new entity witha a "topn" field holding the second element in the array
}
]
}
}
}
}
})

Related

Query maximum N records of each group base on a condition in MongoDB?

I have a question regarding querying data in MongoDB. Here is my sample data:
{
"_id": 1,
"category": "fruit",
"userId": 1,
"name": "Banana"
},
{
"_id": 2,
"category": "fruit",
"userId": 2,
"name": "Apple"
},
{
"_id": 3,
"category": "fresh-food",
"userId": 1,
"name": "Fish"
},
{
"_id": 4,
"category": "fresh-food",
"userId": 2,
"name": "Shrimp"
},
{
"_id": 5,
"category": "vegetable",
"userId": 1,
"name": "Salad"
},
{
"_id": 6,
"category": "vegetable",
"userId": 2,
"name": "carrot"
}
The requirements:
If the category is fruit, returns all the records match
If the category is NOT fruit, returns maximum 10 records of each category grouped by user
The category is known and stable, so we can hard-coded in our query.
I want to get it done in a single query. So the result expected should be:
{
"fruit": [
... // All records of
],
"fresh-food": [
{
"userId": 1,
"data": [
// Top 10 records of user 1 with category = "fresh-food"
]
},
{
"userId": 2,
"data": [
// Top 10 records of user 2 with category = "fresh-food"
]
},
...
],
"vegetable": [
{
"userId": 1,
"data": [
// Top 10 records of user 1 with category = "vegetable"
]
},
{
"userId": 2,
"data": [
// Top 10 records of user 2 with category = "vegetable"
]
},
]
}
I've found the guideline to group by each group using $group and $slice, but I can't apply the requirement number #1.
Any help would be appreciated.
You need to use aggregation for this
$facet to categorize incoming data, we categorized into two. 1. Fruit and 2. non_fruit
$match to match the condition
$group first group to group the data based on category and user. Second group to group by its category only
$objectToArray to make the object into key value pair
$replaceRoot to make the non_fruit to root with fruit
Here is the code
db.collection.aggregate([
{
"$facet": {
"fruit": [
{ $match: { "category": "fruit" } }
],
"non_fruit": [
{
$match: {
$expr: {
$ne: [ "$category", "fruit" ]
}
}
},
{
$group: {
_id: { c: "$category", u: "$userId" },
data: { $push: "$$ROOT" }
}
},
{
$group: {
_id: "$_id.c",
v: {
$push: {
uerId: "$_id.u",
data: { "$slice": [ "$data", 3 ] }
}
}
}
},
{ $addFields: { "k": "$_id", _id: "$$REMOVE" } }
]
}
},
{ $addFields: { non_fruit: { "$arrayToObject": "$non_fruit" } }},
{
"$replaceRoot": {
"newRoot": {
"$mergeObjects": [ "$$ROOT", "$non_fruit" ]
}
}
},
{ $project: { non_fruit: 0 } }
])
Working Mongo playground

How can I get a single item from the array and display it as an object? and not as an array Mongodb

I have a collection from which I need specific obj e.g. notes.blok2 and notes.curse5 as an object, not as an array
{
"year":2020,
"grade":4,
"seccion":"A",
"id": 100,
"name": "pedro",
"notes":[{"curse":5,
"block":1,
"score":{ "a1": 5,"a2": 10, "a3": 15}
},{"curse":5,
"block":2,
"score":{ "b1": 10,"b2": 20, "b3": 30}
}
]
}
My query
notas.find({
"$and":[{"grade":1},{"seccion":"A"},{"year":2020}]},
{"projection":{ "grade":1, "seccion":1,"name":1,"id":1,
"notes":{"$elemMatch":{"block":2,"curse":5}},"notes.score":1} })
It works but returns notes like array
{
"_id": "55",
"id": 100,
"grade": 5,
"name": "pedro",
"seccion": "A",
"notes": [
{"score": { "b1": 10,"b2": 20, "b3": 30} }
]
}
But I NEED LIKE THIS: score at the same level as others and if doesn't exist show empty "score":{}
{
"year":2020,
"grade":5,
"seccion":"A",
"id": 100,
"name": "pedro",
"score":{ "b1": 10,"b2": 20, "b3": 30}
}
Demo - https://mongoplayground.net/p/XlJqR2DYW1X
You can use aggregation query
db.collection.aggregate([
{
$match: { // filter
"grade": 1,
"seccion": "A",
"year": 2020,
"notes": {
"$elemMatch": {
"block": 2,
"curse": 5
}
}
}
},
{ $unwind: "$notes" }, //break into individual documents
{
$match: { // match query on individual note
"notes.block": 2,
"notes.curse": 5
}
},
{
$project: { // projection
"grade": 1,
"seccion": 1,
"name": 1,
"id": 1,
"score": "$notes.score"
}
}
])
Update
Demo - https://mongoplayground.net/p/mq5Kue3UG42
Use $filter
db.collection.aggregate([
{
$match: {
"grade": 1,
"seccion": "A",
"year": 2020
}
},
{
$set: {
"score": {
"$filter": {
"input": "$notes",
"as": "note",
"cond": {
$and: [
{
$eq: [ "$$note.block",3]
},
{
$eq: [ "$$note.curse", 5 ]
}
]
}
}
}
}
},
{
$project: {
// projection
"grade": 1,
"seccion": 1,
"name": 1,
"id": 1,
"score": {
"$first": "$score.score"
}
}
}
])
If you want empty object for score when match not found you can do -
Demo - https://mongoplayground.net/p/dumax58kgrc
{
$set: {
score: {
$cond: [
{ $size: "$score" }, // check array length
{ $first: "$score" }, // true - take 1st
{ score: {} } // false - set empty object
]
}
}
},

Indexing not utilized during the MongoDB aggregation query

I have stuck somewhere in MongoDB aggregate query. I tried to generate a summary report from the database which contains 110M records. during the report generation, I faced the following issues
1).Even though the collection is indexed they are not utilized for the search.
2).Once query execution finished memory of DB server not decreased.
3)query take considerable time to return the result.
im useing mongodb Atlas v4.2.8
sample document
{
"_id": {
"$oid": "5eb122f714d0510011e3a184"
},
"from": "Star_friends",
"to": "94713414047",
"accountName": "ZM",
"accountId": "ZM",
"campaignName": "test 1",
"campaignId": "5eb122f1e921c3001922f73c",
"campaignType": "BULK",
"status": {
"$numberInt": "3"
},
"reason": "No Routing",
"channel": "sms",
"messageType": {
"$numberInt": "1"
},
"event": "MT",
"content": "test 132",
"credit": {
"$numberInt": "1"
},
"msgId": "",
"createdDateTime": "2020-05-05T13:55:27.743Z",
"updatedTime": "2020-05-05T13:55:27.745Z",
"uDate": "2020-05-05",
"operator": "mobitel"
}
my query as follows
db.getCollection('report').aggregate([{
"$match": {
"createdDateTime": {
"$gt": "2020-09-14T00:00:01.000Z",
"$lt": "2020-09-15T23:59:99.999Z"
},
"messageType": {
"$in": [1, 2]
},
"channel": {
"$in": ["sms", "viber", "whatsapp"]
},
"accountId": {
"$in": ["ZM", "KEELLS"]
}
}
}, {
"$project": {
"_id": 0,
"channel": 1,
"messageType": 1,
"accountName": 1,
"accountId": 1,
"createdDateTime": 1,
"uDate": 1,
"credit": 1,
"status": 1
}
}, {
"$group": {
"_id": {
"channel": "$channel",
"messageType": "$messageType",
"accountName": "$accountName",
"accountId": "$accountId",
"filteredDate": {
"$substr": ["$createdDateTime", 0, 7]
},
"sortDate": "$uDate"
},
"total": {
"$sum": "$credit"
},
"send": {
"$sum": {
"$cond": [{
"$in": ["$status", [2, 15, 1, 14, 6, 17, 4, 5]]
}, "$credit", 0]
}
},
"delivered": {
"$sum": {
"$cond": [{
"$in": ["$status", [6, 17, 4]]
},
"$credit",
0
]
}
},
"deliveryFailed": {
"$sum": {
"$cond": [{
"$in": ["$status", [12, 5]]
}, "$credit", 0]
}
},
"failed": {
"$sum": {
"$cond": [{
"$in": ["$status", [3]]
}, "$credit", 0]
}
},
"datass": {
"$addToSet": {
"channel": "$channel",
"messageType": "$messageType",
"accountName": "$accountName",
"accountId": "$accountId",
"filteredDate": {
"$substr": ["$createdDateTime", 0, 7]
},
"sortDate": "$uDate"
}
}
}
}, {
"$unwind": "$datass"
}, {
"$project": {
"_id": 0
}
}, {
"$sort": {
"datass.sortDate": -1
}
}])
indexes as follows
accountId_1 / accountId_1_createdDateTime_-1 / campaignId_-1 / channel_1 / createdDateTime_-1 / messageType_1 / msgId_-1 / msgId_-1_status_1
I would be appreciated if someone can help me with this
Thanks
You gave us little information.
How many documents should average query like such return?
How long does it take to execute the said query?
What I can see here is that your match pipeline is good, because you are trying to filter out documents by fields that are indexed.
But what is a "performance smell" here is your $sort function which does sorting on non-indexed field. Try to do sorting immediately after $match.
Play with it a little more and try to figure out which stage of the pipeline is a performance bottle-neck.
I have resolved my issue by changing my indexes
accountId_1_createdDateTime_-1 /
msgId_-1_status_1 /
accountId_1_messageType_1_channel_1_createdDateTime_1_accountName_1_uDate_1_credit_1_status_1

MongoDB multiple counts, single document, arrays

I have been searching on stackoverflow and cannot find exactly what I am looking for and hope someone can help. I want to submit a single query, get multiple counts back, for a single document, based on array of that document.
My data:
db.myCollection.InsertOne({
"_id": "1",
"age": 30,
"items": [
{
"id": "1",
"isSuccessful": true,
"name": null
},{
"id": "2",
"isSuccessful": true,
"name": null
},{
"id": "3",
"isSuccessful": true,
"name": "Bob"
},{
"id": "4",
"isSuccessful": null,
"name": "Todd"
}
]
});
db.myCollection.InsertOne({
"_id": "2",
"age": 22,
"items": [
{
"id": "6",
"isSuccessful": true,
"name": "Jeff"
}
]
});
What I need back is the document and the counts associated to the items array for said document. In this example where the document _id = "1":
{
"_id": "1",
"age": 30,
{
"totalIsSuccessful" : 2,
"totalNotIsSuccessful": 1,
"totalSuccessfulNull": 1,
"totalNameNull": 2
}
}
I have found that I can get this in 4 queries using something like this below, but I would really like it to be one query.
db.test1.aggregate([
{ $match : { _id : "1" } },
{ "$project": {
"total": {
"$size": {
"$filter": {
"input": "$items",
"cond": { "$eq": [ "$$this.isSuccessful", true ] }
}
}
}
}}
])
Thanks in advance.
I am assuming your expected result is invalid since you have an object literal in the middle of another object and also you have totalIsSuccessful for id:1 as 2 where it seems they should be 3. With that said ...
you can get similar output via $unwind and then grouping with $sum and $cond:
db.collection.aggregate([
{ $match: { _id: "1" } },
{ $unwind: "$items" },
{ $group: {
_id: "_id",
age: { $first: "$age" },
totalIsSuccessful: { $sum: { $cond: [{ "$eq": [ "$items.isSuccessful", true ] }, 1, 0 ] } },
totalNotIsSuccessful: { $sum: { $cond: [{ "$ne": [ "$items.isSuccessful", true ] }, 1, 0 ] } },
totalSuccessfulNull: { $sum: { $cond: [{ "$eq": [ "$items.isSuccessful", null ] }, 1, 0 ] } },
totalNameNull: { $sum: { $cond: [ { "$eq": [ "$items.name", null ]}, 1, 0] } } }
}
])
The output would be this:
[
{
"_id": "_id",
"age": 30,
"totalIsSuccessful": 3,
"totalNameNull": 2,
"totalNotIsSuccessful": 1,
"totalSuccessfulNull": 1
}
]
You can see it working here

MongoDB aggregation and sums by common field inside an array

I'm trying to get a list that counts all the victories and battles grouped by player name out of this json that I obtain from an API:
[
{
"createdDate": 1541411260,
"players": [
{
"tag": "tag1234",
"name": "name1",
"battles": 2,
"wins": 1
},
{
"tag": "tag124567",
"name": "name2",
"battles": 1,
"wins": 0
},
{
"tag": "tag1234",
"name": "name3",
"battles": 3,
"wins": 3
}
]
},
{
"createdDate": 1541411460,
"players": [
{
"tag": "tag1234",
"name": "name1",
"battles": 1,
"wins": 1
},
{
"tag": "tag124567",
"name": "name2",
"battles": 1,
"wins": 1
},
{
"tag": "tag1234",
"name": "name3",
"battles": 0,
"wins": 0
},
{
"tag": "tag124567",
"name": "name4",
"battles": 1,
"wins": 0
}
]
},
{
"createdDate": 1541455260,
"players": [
{
"tag": "tag1234",
"name": "name1",
"battles": 0,
"wins": 0
},
{
"tag": "tag124567",
"name": "name2",
"battles": 4,
"wins": 4
},
{
"tag": "tag1234",
"name": "name3",
"battles": 6,
"wins": 6
}
]
}
]
The mongo query I'm using is the following but I can't get the names and battles/wins:
db.getCollection("logs").aggregate([
{ $unwind : '$players' },
{
$group: {
_id: { name: '$players.name' },
numBattles: { $sum: '$players.battles' },
numWins: { $sum: '$players.wins' }
}
},
{
$project: {
name: "$_id.name",
numBattles: '$_id.numBattles',
numWins: '$_id.numWins',
_id: 0
}
]
).pretty();
This gave me 0 results.
Also tried the following but it's returning a full group of players and their stats:
db.getCollection("logs").aggregate([
{
$group: {
_id: { name: '$players.name' },
numBattles: { $sum: '$players.battles' },
numWins: { $sum: '$players.wins' }
}
},
{
$project: {
name: "$_id.name",
numBattles: '$_id.numBattles',
numWins: '$_id.numWins',
_id: 0
}
}
]
).pretty();
The idea is to get something like this:
name1 - 3 battles and 2 wins,
name2 - x battles and y wins,
...
Any ideas?
Thank you.
In your $project stage you're referring to _id which contains only name and other fields should be referenced directly so you just need to change your $project to:
{
$project: {
name: "$_id.name",
numBattles: "$numBattles",
numWins: "$numWins",
_id: 0
}
}