MongoDB - Sum value in nested array - mongodb

I have an object looks like this
{
"_id" : {
"import_type" : "MANUAL_UPLOAD",
"supplier" : "jabino.de",
"unit_price" : "0"
},
"statuses" : [
{
"status" : "DUPLICATED",
"count" : 14
},
{
"status" : "BLACKLISTED",
"count" : 2
},
{
"status" : "USABLE",
"count" : 2239
},
{
"status" : "INVALID_EMAIL_ADDRESS",
"count" : 1
},
{
"status" : "DUPLICATED",
"count" : 14
},
{
"status" : "BLACKLISTED",
"count" : 2
},
{
"status" : "USABLE",
"count" : 2239
},
{
"status" : "INVALID_EMAIL_ADDRESS",
"count" : 1
}
]
}
How I can sum all the count in the statuses array which has the same status without losing keys-values in _id. E.g. in this case
Duplicated: 28
Blacklisted: 4
Usable: 4478
Invalid email address: 2

You can use below aggregation
db.collection.aggregate([
{ "$unwind": "$statuses" },
{ "$group": {
"_id": {
"_id": "$_id",
"statuses": "$statuses.status"
},
"count": { "$sum": "$statuses.count" }
}},
{ "$group": {
"_id": "$_id._id",
"statuses": {
"$push": {
"status": "$_id.statuses",
"count": "$count"
}
}
}}
])

Related

How to get percentage total of data with group by date in MongoDB

How to get percentage total of data with group by date in MongoDB ?
Link example : https://mongoplayground.net/p/aNND4EPQhcb
I have some collection structure like this
{
"_id" : ObjectId("5ccbb96706d1d47a4b2ced4b"),
"date" : "2019-05-03T10:39:53.108Z",
"id" : 166,
"update_at" : "2019-05-03T10:45:36.208Z",
"type" : "image"
}
{
"_id" : ObjectId("5ccbb96706d1d47a4b2ced4c"),
"date" : "2019-05-03T10:39:53.133Z",
"id" : 166,
"update_at" : "2019-05-03T10:45:36.208Z",
"type" : "image"
}
{
"_id" : ObjectId("5ccbb96706d1d47a4b2ced4d"),
"date" : "2019-05-03T10:39:53.180Z",
"id" : 166,
"update_at" : "2019-05-03T10:45:36.208Z",
"type" : "image"
}
{
"_id" : ObjectId("5ccbb96706d1d47a4b2ced4e"),
"date" : "2019-05-03T10:39:53.218Z",
"id" : 166,
"update_at" : "2019-05-03T10:45:36.208Z",
"type" : "image"
}
And I have query in mongodb to get data of collection, how to get percentage of total data. in bellow example query to get data :
db.name_collection.aggregate(
[
{ "$match": {
"update_at": { "$gte": "2019-11-04T00:00:00.0Z", "$lt": "2019-11-06T00:00:00.0Z"},
"id": { "$in": [166] }
} },
{
"$group" : {
"_id": {
$substr: [ '$update_at', 0, 10 ]
},
"count" : {
"$sum" : 1
}
}
},
{
"$project" : {
"_id" : 0,
"date" : "$_id",
"count" : "$count"
}
},
{
"$sort" : {
"date" : 1
}
}
]
)
and this response :
{
"date" : "2019-11-04",
"count" : 39
},
{
"date" : "2019-11-05",
"count" : 135
}
how to get percentage data total from key count ? example response to this :
{
"date" : "2019-11-04",
"count" : 39,
"percentage" : "22%"
},
{
"date" : "2019-11-05",
"count" : 135,
"percentage" : "78%"
}
You have to group by null to get total count and then use $map to calculate the percentage. $round will be a useful operator in such case. Finally you can $unwind and $replaceRoot to get back the same number of documents:
db.collection.aggregate([
// previous aggregation steps
{
$group: {
_id: null,
total: { $sum: "$count" },
docs: { $push: "$$ROOT" }
}
},
{
$project: {
docs: {
$map: {
input: "$docs",
in: {
date: "$$this.date",
count: "$$this.count",
percentage: { $concat: [ { $toString: { $round: { $multiply: [ { $divide: [ "$$this.count", "$total" ] }, 100 ] } } }, '%' ] }
}
}
}
}
},
{
$unwind: "$docs"
},
{
$replaceRoot: { newRoot: "$docs" }
}
])
Mongo Playground

How to count occurences of values in a nested array?

I've searched but could not find an answer to my problem. I need to count the occurences of the field "nationalCode". I've got a collection with this sample structure in MongoDB:
{
"_id" : ObjectId("5d7519cc6c17d65d4983f048"),
"origin" : "Base1",
"topic" : [
{
"nationalTopic" : {
"nationalCode" : 26
},
"dateTime" : NumberLong(20120927000000)
},
{
"nationalTopic" : {
"nationalCode" : 132
},
"dateTime" : NumberLong(20120927000000)
},
{
"nationalTopic" : {
"nationalCode" : 26
},
"dateTime" : NumberLong(20120927000000)
},
{
"nationalTopic" : {
"nationalCode" : 26
},
"dateTime" : NumberLong(20121005000000)
}
]
}
I've used the following code (I tried many variations of it, but none of them got me the right results):
db.processos.aggregate(
[
{ "$unwind": "$topic" },
{"$match": {"origin": "Base1"}},
{"$group": { "_id": { nationalCode: "$topic.nationalTopic.nationalCode", "count": { "$sum": 1 }} } }
]
)
I'm expecting something like this:
{
"_id" : {
"nationalCode" : 26,
"count" : 3.0
}
}
/* 2 */
{
"_id" : {
"nationalCode" : 132,
"count" : 1.0
}
}
You should extract the count element from the _id.
The following query worked for me.
db.data.aggregate(
[
{ "$unwind": "$topic" },
{"$match": {"origin": "Base1"}},
{"$group": { _id: { "nationalCode": "$topic.nationalTopic.nationalCode" },
"count": {$sum: 1} }
}
]
)
just do it with $project to change your format
do it like this
MongoDB Enterprise >
db.ggg.aggregate(
[
{$unwind:"$topic"},
{"$match": {"origin": "Base1"}},
{"$group": { "_id": { nationalCode: "$topic.nationalTopic.nationalCode"},
"count": { "$sum": 1 } }},
{$project :{"_id.nationalCode":1,"_id.count":"$count"}}
]
)
here it the result !
{ "_id" : { "nationalCode" : 26, "count" : 3 } }
{ "_id" : { "nationalCode" : 132, "count" : 1 } }

$sum arrays with the same ids in mongodb

The two documents of my collection look like this:
First document
{
"_id" : 2055,
"counervalues" : {
"chcounter" : 3
"bscounter" : 10
}
"attributionvalues" :[
{
"id" : 1
"conversionvalue" : 85.0
"conversioncounter" : 6300.0
},
{
"id" : 2
"conversionvalue" : 25.0
"conversioncounter" : 600
}
}
Second document
{
"_id" : 1046,
"counervalues" : {
"chcounter" : 23
"bscounter" : 46
}
"attributionvalues" :[
{
"id" : 1
"conversionvalue" : 15.0
"conversioncounter" : 275.0
},
{
"id" : 2
"conversionvalue" : 65.0
"conversioncounter" : 12000.0
}
}
Now I want to apply the aggregation framework in order to get a new document which has a result as this:
Result
{
"_id" : 3005,
"counervalues" : {
"chcounter" : 26
"bscounter" : 56
}
"attributionvalues" :[
{
"id" : 1
"conversionvalue" : 100.0
"conversioncounter" : 6575.0
},
{
"id" : 2
"conversionvalue" : 90.0
"conversioncounter" : 12600.0
}
}
I started my aggregation like this:
db.conversion.counters.aggregate({
$match:
{
"_id" : {"$gte" : 1046 , "$lte" : 2055}
}
$group:
{
cvchc: {$sum: "$counervalues.chcounter"}
cvbsc: {$sum: "$counervalues.bscounter"}
}
});
but I have trouble to match the attributionvalues according to their ids and add them.
Anyone has an idea?
Run the following aggregation pipeline, should give you the desired results:
db.conversion.aggregate([
{ "$match": { "_id" : { "$gte" : 1046 , "$lte" : 2055 } } },
{ "$unwind": "$attributionvalues" },
{
"$group": {
"_id": "$attributionvalues.id",
"cvchc": { "$sum": "$counervalues.chcounter" },
"cvbsc": { "$sum": "$counervalues.bscounter" },
"avcv": { "$sum": "$attributionvalues.conversionvalue" },
"avcc": { "$sum": "$attributionvalues.conversioncounter" }
}
},
{
"$group": {
"_id": null,
"chcounter": { "$first": "$cvchc" },
"bscounter" : { "$first": "$cvbsc" },
"attributionvalues": {
"$push": {
"id": "$_id",
"conversionvalue": "$avcv" ,
"conversioncounter": "$avcc"
}
}
}
},
{
"$project": {
"counervalues": {
"chcounter": "$chcounter",
"bscounter": "$bscounter"
},
"attributionvalues": 1
}
}
])

Group by reduced field depending on variable in mongodb

I have the following collection for messages:
{
"_id" : ObjectId("56214d5632001bae07a6e6b3"),
"sender_id" : 8,
"receiver_id" : 2,
"content" : "fdgfd",
"state" : 1,
"timestamp" : 1445023062899.0000000000000000
},
{
"_id" : ObjectId("56214d5c32001bae07a6e6b4"),
"sender_id" : 2,
"receiver_id" : 8,
"content" : "fasfa",
"state" : 1,
"timestamp" : 1445023068443.0000000000000000
},
{
"_id" : ObjectId("56214d8032001bae07a6e6b5"),
"sender_id" : 2,
"receiver_id" : 8,
"content" : "dfdsfds",
"state" : 1,
"timestamp" : 1445023104363.0000000000000000
},
{
"_id" : ObjectId("56214d8032001bae07a6e6b6"),
"sender_id" : 2,
"receiver_id" : 8,
"content" : "fdsf",
"state" : 1,
"timestamp" : 1445023104825.0000000000000000
},
{
"_id" : ObjectId("56214d8132001bae07a6e6b7"),
"sender_id" : 2,
"receiver_id" : 8,
"content" : "sfsdfs",
"state" : 1,
"timestamp" : 1445023105436.0000000000000000
},
{
"_id" : ObjectId("56214d8132001bae07a6e6b8"),
"sender_id" : 2,
"receiver_id" : 8,
"content" : "f",
"state" : 1,
"timestamp" : 1445023105963.0000000000000000
},
{
"_id" : ObjectId("56214d8432001bae07a6e6b9"),
"sender_id" : 2,
"receiver_id" : 8,
"content" : "qwqwqwq",
"state" : 1,
"timestamp" : 1445023108202.0000000000000000
},
{
"_id" : ObjectId("56214db032001bae07a6e6ba"),
"sender_id" : 9902,
"receiver_id" : 2,
"content" : "fsafa",
"state" : 1,
"timestamp" : 1445023152297.0000000000000000
}
I'm trying to get all unique users ids that had been messaging with user 2, along with the last content message. So the result should be:
[ { user: 8, lastContent: "qwqwqwq" }, { user: 9902, lastContent: "fsafa" } ]
By now, I have the following code:
db.getCollection('messenger').group({
keyf: function(doc) {
return { user: doc.user };
},
cond: {
$or : [
{ sender_id : 2 },
{ receiver_id : 2 }
]
},
reduce: function( curr, result ) {
result.user = (curr.sender_id == 2 ? curr.receiver_id : curr.sender_id);
result.content = curr.content;
},
initial: { } })
But I only get the last id. The result:
{
"0" : {
"user" : 9902.0000000000000000,
"content" : "fsafa"
} }
Can anyone help me with this?
You need to use the .aggregate() method. You need to reduce the size of documents in the pipeline using the $match operator which filter out all documents where the receiver_id is not equal to 2. After that you need to $sort your document by timestamp in descending order this will help us get the content of last message sent. Now comes the $group stage where you group your documents and use the $addToSet operator which returns array of distinct sender_id and distinct receiver_id and the $last operator to get the last message content. Now to get the user_ids we need union of distinct sender_id and receiver_id which we can get after $projection using the $setUnion operator.
db.messenger.aggregate([
{ "$match": {
"$or": [
{ "sender_id": 2 },
{ "receiver_id": 2 }
]
}},
{ "$sort": { "timestamp": 1 } },
{ "$group": {
"_id": null,
"receiver_id": {
"$addToSet": { "$receiver_id" }
},
"sender_id": {
"$addToSet": { "$sender_id" }
},
"lastContent": { "$last": "$content" }
}},
{ "$project": {
"_id": 0,
"lastContent": 1,
"user_ids": {
"$setUnion": [
"$sender_id",
"$receiver_id"
]
}
}}
])
Which returns:
{ "lastContent" : "fsafa", "user_ids" : [ 9902, 2, 8 ] }
Now if what you want is distinct user alongside their last content message with user 2 then here it is:
db.messenger.aggregate([
{ "$match": {
"$or": [
{ "sender_id": 2 },
{ "receiver_id": 2 }
]
}},
{ "$sort": { "timestamp": 1 } },
{ "$group": {
"_id": {
"sender": "$sender_id",
"receiver": "$receiver_id"
},
"lastContent": {
"$last": "$content"
},
"timestamp": { "$last": "$timestamp" },
"sender": { "$addToSet": "$sender_id" },
"receiver": { "$addToSet": "$receiver_id" }
}},
{ "$project": {
"_id": 0,
"user": {
"$setDifference": [
{ "$setUnion": [ "$sender", "$receiver" ] },
[ 2 ]
]
},
"lastContent": 1,
"timestamp": 1
}},
{ "$unwind": "$user" },
{ "$sort": { "timestamp": 1 } },
{ "$group": {
"_id": "$user",
"lastContent": { "$last": "$lastContent" }
} }
])
Which yields:
{ "_id" : 9902, "lastContent" : "fsafa" }
{ "_id" : 8, "lastContent" : "qwqwqwq" }

MongoDB aggregate group by sum of distinct column

I have analytics collection with the below sample data.
{ "_id" : ObjectId("55f996a4e4b0cc9c0a392594"), "action" : "apiUploadFile", "assetId" : "55f996a4e4b0cc9c0a392593" },
{ "_id" : ObjectId("5603d384e4b0cf75af10be88"), "action" : "agAsset", "assetId" : "55f996a4e4b0cc9c0a392593"},
{ "_id" : ObjectId("5603d395e4b0cf75af10becc"), "action" : "aAD", "assetId" : "55f996a4e4b0cc9c0a392593" },
{ "_id" : ObjectId("5603d395e4b0cf75af10becd"), "action" : "mobCmd", "assetId" : "55f996a4e4b0cc9c0a392593", sessionId : "123"},
{ "_id" : ObjectId("5603d395e4b0cf75af10bece"), "action" : "mobCmd", "assetId" : "55f996a4e4b0cc9c0a392593", sessionId : "1234" },
{ "_id" : ObjectId("5603d395e4b0cf75af10becf"), "action" : "mobCmd", "assetId" : "55f996a4e4b0cc9c0a392593", sessionId : "1234" }
I need find sum of analytics group by 'assetId' and then for each 'action' type. I have come up with the below query
db.analytics.aggregate(
[
{
$match : {
'assetId' : { "$ne": null }
}
},
{$group :{
_id:
{
assId:'$assetId'
},
viewCount:{
$sum:{
$cond: [ { $eq: [ '$action', 'agAsset' ] }, 1, 0 ]
}
},
sessionCount:{
$sum:{
$cond: [ { $eq: [ '$action', 'mobCmd' ] }, 1, 0 ]
}
}
}
}]
)
This works great except for the fact that I can not find the 'sessionCount' using distinct 'sessionId'. For example here is the current output
{ "_id" : { "assId" : "55f996a4e4b0cc9c0a392593" }, "viewCount" : 1, "sessionCount" : 3 }
The expected output is
{ "_id" : { "assId" : "55f996a4e4b0cc9c0a392593" }, "viewCount" : 1, "sessionCount" : 2 }
I need find the sessionCount for action='mobCmd' and has distinct values for sessionId. How can use distinct inside $sum operation of the 'sessionCount' section?
You will need to group your documents on a compound _id field.
db.collection.aggregate([
{ "$match": { "assetId": { "$ne": null }}},
{ "$group": {
"_id": { "assId": "$assetId", "sessionId": "$sessionId" },
"viewCount": {
"$sum": {
"$cond": [
{ "$eq": [ "$action", "agAsset" ] },
1,
0
]
}
},
"sessionCount": {
"$sum": {
"$cond": [
{ "$eq": [ "$action", "mobCmd" ] },
1,
0
]
}
}
}}
])
Which yields:
{ "_id" : { "assId" : "55f996a4e4b0cc9c0a392593", "sessionId" : "1234" }, "viewCount" : 0, "sessionCount" : 2 }
{ "_id" : { "assId" : "55f996a4e4b0cc9c0a392593", "sessionId" : "123" }, "viewCount" : 0, "sessionCount" : 1 }
{ "_id" : { "assId" : "55f996a4e4b0cc9c0a392593" }, "viewCount" : 1, "sessionCount" : 0 }
Or use the $addToSet operator to return an array of unique sessionId and $unwind the array then regroup your documents.
db.collection.aggregate([
{ "$match": { "assetId": { "$ne": null }}},
{ "$group": {
"_id": "$assetId",
"sessionId": { "$addToSet": "$sessionId" },
"viewCount": {
"$sum": {
"$cond": [
{ "$eq": [ "$action", "agAsset" ] },
1,
0
]
}
}
}},
{ "$unwind": "$sessionId" },
{ "$group": {
"_id": "$_id",
"viewCount": { "$first": "$viewCount" },
"sessionCount": { "$sum": 1 }
}}
])
Which returns:
{ "_id" : "55f996a4e4b0cc9c0a392593", "viewCount" : 1, "sessionCount" : 2 }