Group by reduced field depending on variable in mongodb - mongodb

I have the following collection for messages:
{
"_id" : ObjectId("56214d5632001bae07a6e6b3"),
"sender_id" : 8,
"receiver_id" : 2,
"content" : "fdgfd",
"state" : 1,
"timestamp" : 1445023062899.0000000000000000
},
{
"_id" : ObjectId("56214d5c32001bae07a6e6b4"),
"sender_id" : 2,
"receiver_id" : 8,
"content" : "fasfa",
"state" : 1,
"timestamp" : 1445023068443.0000000000000000
},
{
"_id" : ObjectId("56214d8032001bae07a6e6b5"),
"sender_id" : 2,
"receiver_id" : 8,
"content" : "dfdsfds",
"state" : 1,
"timestamp" : 1445023104363.0000000000000000
},
{
"_id" : ObjectId("56214d8032001bae07a6e6b6"),
"sender_id" : 2,
"receiver_id" : 8,
"content" : "fdsf",
"state" : 1,
"timestamp" : 1445023104825.0000000000000000
},
{
"_id" : ObjectId("56214d8132001bae07a6e6b7"),
"sender_id" : 2,
"receiver_id" : 8,
"content" : "sfsdfs",
"state" : 1,
"timestamp" : 1445023105436.0000000000000000
},
{
"_id" : ObjectId("56214d8132001bae07a6e6b8"),
"sender_id" : 2,
"receiver_id" : 8,
"content" : "f",
"state" : 1,
"timestamp" : 1445023105963.0000000000000000
},
{
"_id" : ObjectId("56214d8432001bae07a6e6b9"),
"sender_id" : 2,
"receiver_id" : 8,
"content" : "qwqwqwq",
"state" : 1,
"timestamp" : 1445023108202.0000000000000000
},
{
"_id" : ObjectId("56214db032001bae07a6e6ba"),
"sender_id" : 9902,
"receiver_id" : 2,
"content" : "fsafa",
"state" : 1,
"timestamp" : 1445023152297.0000000000000000
}
I'm trying to get all unique users ids that had been messaging with user 2, along with the last content message. So the result should be:
[ { user: 8, lastContent: "qwqwqwq" }, { user: 9902, lastContent: "fsafa" } ]
By now, I have the following code:
db.getCollection('messenger').group({
keyf: function(doc) {
return { user: doc.user };
},
cond: {
$or : [
{ sender_id : 2 },
{ receiver_id : 2 }
]
},
reduce: function( curr, result ) {
result.user = (curr.sender_id == 2 ? curr.receiver_id : curr.sender_id);
result.content = curr.content;
},
initial: { } })
But I only get the last id. The result:
{
"0" : {
"user" : 9902.0000000000000000,
"content" : "fsafa"
} }
Can anyone help me with this?

You need to use the .aggregate() method. You need to reduce the size of documents in the pipeline using the $match operator which filter out all documents where the receiver_id is not equal to 2. After that you need to $sort your document by timestamp in descending order this will help us get the content of last message sent. Now comes the $group stage where you group your documents and use the $addToSet operator which returns array of distinct sender_id and distinct receiver_id and the $last operator to get the last message content. Now to get the user_ids we need union of distinct sender_id and receiver_id which we can get after $projection using the $setUnion operator.
db.messenger.aggregate([
{ "$match": {
"$or": [
{ "sender_id": 2 },
{ "receiver_id": 2 }
]
}},
{ "$sort": { "timestamp": 1 } },
{ "$group": {
"_id": null,
"receiver_id": {
"$addToSet": { "$receiver_id" }
},
"sender_id": {
"$addToSet": { "$sender_id" }
},
"lastContent": { "$last": "$content" }
}},
{ "$project": {
"_id": 0,
"lastContent": 1,
"user_ids": {
"$setUnion": [
"$sender_id",
"$receiver_id"
]
}
}}
])
Which returns:
{ "lastContent" : "fsafa", "user_ids" : [ 9902, 2, 8 ] }
Now if what you want is distinct user alongside their last content message with user 2 then here it is:
db.messenger.aggregate([
{ "$match": {
"$or": [
{ "sender_id": 2 },
{ "receiver_id": 2 }
]
}},
{ "$sort": { "timestamp": 1 } },
{ "$group": {
"_id": {
"sender": "$sender_id",
"receiver": "$receiver_id"
},
"lastContent": {
"$last": "$content"
},
"timestamp": { "$last": "$timestamp" },
"sender": { "$addToSet": "$sender_id" },
"receiver": { "$addToSet": "$receiver_id" }
}},
{ "$project": {
"_id": 0,
"user": {
"$setDifference": [
{ "$setUnion": [ "$sender", "$receiver" ] },
[ 2 ]
]
},
"lastContent": 1,
"timestamp": 1
}},
{ "$unwind": "$user" },
{ "$sort": { "timestamp": 1 } },
{ "$group": {
"_id": "$user",
"lastContent": { "$last": "$lastContent" }
} }
])
Which yields:
{ "_id" : 9902, "lastContent" : "fsafa" }
{ "_id" : 8, "lastContent" : "qwqwqwq" }

Related

MongoDB - Group by and count value, but treat per record as one

I want to group by and count follow_user.tags.tag_id per record, so no matter how many times the same tag_id show up on the same record, it only counts as 1.
My database structure looks like this:
{
"external_userid" : "EXID1",
"follow_user" : [
{
"userid" : "USERID1",
"tags" : [
{
"tag_id" : "TAG1"
}
]
},
{
"userid" : "USERID2",
"tags" : [
{
"tag_id" : "TAG1"
},
{
"tag_id" : "TAG2"
}
]
}
]
},
{
"external_userid" : "EXID2",
"follow_user" : [
{
"userid" : "USERID1",
"tags" : [
{
"tag_id" : "TAG2"
}
]
}
]
}
Here's my query:
[
{ "$unwind": "$follow_user" }, { "$unwind": "$follow_user.tags" },
{ "$group" : { "_id" : { "follow_user᎐tags᎐tag_id" : "$follow_user.tags.tag_id" }, "COUNT(_id)" : { "$sum" : 1 } } },
{ "$project" : { "total" : "$COUNT(_id)", "tagId" : "$_id.follow_user᎐tags᎐tag_id", "_id" : 0 } }
]
What I expected:
{
"total" : 1,
"tagId" : "TAG1"
},
{
"total" : 2,
"tagId" : "TAG2"
}
What I get:
{
"total" : 2,
"tagId" : "TAG1"
},
{
"total" : 2,
"tagId" : "TAG2"
}
$set - Create a new field follow_user_tags.
1.1. $setUnion - To distinct the value from the Result 1.1.1.
1.1.1. $reduce - Add the value of follow_user.tags.tag_id into array.
$unwind - Deconstruct follow_user_tags array field to multiple documents.
$group - Group by follow_user_tags and perform total count via $sum.
$project - Decorate output document.
db.collection.aggregate([
{
$set: {
follow_user_tags: {
$setUnion: {
"$reduce": {
"input": "$follow_user.tags",
"initialValue": [],
"in": {
"$concatArrays": [
"$$value",
"$$this.tag_id"
]
}
}
}
}
}
},
{
$unwind: "$follow_user_tags"
},
{
$group: {
_id: "$follow_user_tags",
total: {
$sum: 1
}
}
},
{
$project: {
_id: 0,
tagId: "$_id",
total: 1
}
}
])
Sample Mongo Playground

MongoDB aggregate array of objects together by object id and count occurences

I'm trying to figure out what I'm doing wrong, I have collected the following, "Subset of data", "Desired output"
This is how my data objects look
[{
"survey_answers": [
{
"id": "9ca01568e8dbb247", // As they are, this is the key to groupBy
"option_answer": 5, // Represent the index of the choosen option
"type": "OPINION_SCALE" // Opinion scales are 0-10 (meaning elleven options)
},
{
"id": "ba37125ec32b2a99",
"option_answer": 3,
"type": "LABELED_QUESTIONS" // Labeled questions are 0-x (they can change it from survey to survey)
}
],
"survey_id": "test"
},
{
"survey_answers": [
{
"id": "9ca01568e8dbb247",
"option_answer": 0,
"type": "OPINION_SCALE"
},
{
"id": "ba37125ec32b2a99",
"option_answer": 3,
"type": "LABELED_QUESTIONS"
}
],
"survey_id": "test"
}]
My desired output is:
[
{
id: '9ca01568e8dbb247'
results: [
{ _id: 5, count: 1 },
{ _id: 0, count: 1 }
]
},
{
id: 'ba37125ec32b2a99'
results: [
{ _id: 3, count: 2 }
]
}
]
Active query
Model.aggregate([
{
$match: {
'survey_id': survey_id
}
},
{
$unwind: "$survey_answers"
},
{
$group: {
_id: "$survey_answers.option_answer",
count: {
$sum: 1
}
}
}
])
Current output
[
{
"_id": 0,
"count": 1
},
{
"_id": 3,
"count": 2
},
{
"_id": 5,
"count": 1
}
]
I added your records to my db. Post that I tried your commands one by one.
$unwind results you similar to -
> db.survey.aggregate({$unwind: "$survey_answers"})
{ "_id" : ObjectId("5c3859e459875873b5e6ee3c"), "survey_answers" : { "id" : "9ca01568e8dbb247", "option_answer" : 5, "type" : "OPINION_SCALE" }, "survey_id" : "test" }
{ "_id" : ObjectId("5c3859e459875873b5e6ee3c"), "survey_answers" : { "id" : "ba37125ec32b2a99", "option_answer" : 3, "type" : "LABELED_QUESTIONS" }, "survey_id" : "test" }
{ "_id" : ObjectId("5c3859e459875873b5e6ee3d"), "survey_answers" : { "id" : "9ca01568e8dbb247", "option_answer" : 0, "type" : "OPINION_SCALE" }, "survey_id" : "test" }
{ "_id" : ObjectId("5c3859e459875873b5e6ee3d"), "survey_answers" : { "id" : "ba37125ec32b2a99", "option_answer" : 3, "type" : "LABELED_QUESTIONS" }, "survey_id" : "test" }
I am not adding code for match since that is okay in your query as well
The grouping would be -
> db.survey.aggregate({$unwind: "$survey_answers"},{$group: { _id: { 'optionAnswer': "$survey_answers.option_answer", 'id':"$survey_answers.id"}, count: { $sum: 1}}})
{ "_id" : { "optionAnswer" : 0, "id" : "9ca01568e8dbb247" }, "count" : 1 }
{ "_id" : { "optionAnswer" : 3, "id" : "ba37125ec32b2a99" }, "count" : 2 }
{ "_id" : { "optionAnswer" : 5, "id" : "9ca01568e8dbb247" }, "count" : 1 }
You can group on $survey_answers.id to bring it into projection.
The projection is what you're missing in your query -
> db.survey.aggregate({$unwind: "$survey_answers"},{$group: { _id: { 'optionAnswer': "$survey_answers.option_answer", 'id':'$survey_answers.id'}, count: { $sum: 1}}}, {$project : {answer: '$_id.optionAnswer', id: '$_id.id', count: '$count', _id:0}})
{ "answer" : 0, "id" : "9ca01568e8dbb247", "count" : 1 }
{ "answer" : 3, "id" : "ba37125ec32b2a99", "count" : 2 }
{ "answer" : 5, "id" : "9ca01568e8dbb247", "count" : 1 }
Further you can add a group on id and add results to a set. And your final query would be -
db.survey.aggregate(
{$unwind: "$survey_answers"},
{$group: {
_id: { 'optionAnswer': "$survey_answers.option_answer", 'id':'$survey_answers.id'},
count: { $sum: 1}
}},
{$project : {
answer: '$_id.optionAnswer',
id: '$_id.id',
count: '$count',
_id:0
}},
{$group: {
_id:{id:"$id"},
results: { $addToSet: {answer: "$answer", count: '$count'} }
}},
{$project : {
id: '$_id.id',
answer: '$results',
_id:0
}})
Hope this helps.

MongoDB - Sum value in nested array

I have an object looks like this
{
"_id" : {
"import_type" : "MANUAL_UPLOAD",
"supplier" : "jabino.de",
"unit_price" : "0"
},
"statuses" : [
{
"status" : "DUPLICATED",
"count" : 14
},
{
"status" : "BLACKLISTED",
"count" : 2
},
{
"status" : "USABLE",
"count" : 2239
},
{
"status" : "INVALID_EMAIL_ADDRESS",
"count" : 1
},
{
"status" : "DUPLICATED",
"count" : 14
},
{
"status" : "BLACKLISTED",
"count" : 2
},
{
"status" : "USABLE",
"count" : 2239
},
{
"status" : "INVALID_EMAIL_ADDRESS",
"count" : 1
}
]
}
How I can sum all the count in the statuses array which has the same status without losing keys-values in _id. E.g. in this case
Duplicated: 28
Blacklisted: 4
Usable: 4478
Invalid email address: 2
You can use below aggregation
db.collection.aggregate([
{ "$unwind": "$statuses" },
{ "$group": {
"_id": {
"_id": "$_id",
"statuses": "$statuses.status"
},
"count": { "$sum": "$statuses.count" }
}},
{ "$group": {
"_id": "$_id._id",
"statuses": {
"$push": {
"status": "$_id.statuses",
"count": "$count"
}
}
}}
])

MongoDB aggregate group by sum of distinct column

I have analytics collection with the below sample data.
{ "_id" : ObjectId("55f996a4e4b0cc9c0a392594"), "action" : "apiUploadFile", "assetId" : "55f996a4e4b0cc9c0a392593" },
{ "_id" : ObjectId("5603d384e4b0cf75af10be88"), "action" : "agAsset", "assetId" : "55f996a4e4b0cc9c0a392593"},
{ "_id" : ObjectId("5603d395e4b0cf75af10becc"), "action" : "aAD", "assetId" : "55f996a4e4b0cc9c0a392593" },
{ "_id" : ObjectId("5603d395e4b0cf75af10becd"), "action" : "mobCmd", "assetId" : "55f996a4e4b0cc9c0a392593", sessionId : "123"},
{ "_id" : ObjectId("5603d395e4b0cf75af10bece"), "action" : "mobCmd", "assetId" : "55f996a4e4b0cc9c0a392593", sessionId : "1234" },
{ "_id" : ObjectId("5603d395e4b0cf75af10becf"), "action" : "mobCmd", "assetId" : "55f996a4e4b0cc9c0a392593", sessionId : "1234" }
I need find sum of analytics group by 'assetId' and then for each 'action' type. I have come up with the below query
db.analytics.aggregate(
[
{
$match : {
'assetId' : { "$ne": null }
}
},
{$group :{
_id:
{
assId:'$assetId'
},
viewCount:{
$sum:{
$cond: [ { $eq: [ '$action', 'agAsset' ] }, 1, 0 ]
}
},
sessionCount:{
$sum:{
$cond: [ { $eq: [ '$action', 'mobCmd' ] }, 1, 0 ]
}
}
}
}]
)
This works great except for the fact that I can not find the 'sessionCount' using distinct 'sessionId'. For example here is the current output
{ "_id" : { "assId" : "55f996a4e4b0cc9c0a392593" }, "viewCount" : 1, "sessionCount" : 3 }
The expected output is
{ "_id" : { "assId" : "55f996a4e4b0cc9c0a392593" }, "viewCount" : 1, "sessionCount" : 2 }
I need find the sessionCount for action='mobCmd' and has distinct values for sessionId. How can use distinct inside $sum operation of the 'sessionCount' section?
You will need to group your documents on a compound _id field.
db.collection.aggregate([
{ "$match": { "assetId": { "$ne": null }}},
{ "$group": {
"_id": { "assId": "$assetId", "sessionId": "$sessionId" },
"viewCount": {
"$sum": {
"$cond": [
{ "$eq": [ "$action", "agAsset" ] },
1,
0
]
}
},
"sessionCount": {
"$sum": {
"$cond": [
{ "$eq": [ "$action", "mobCmd" ] },
1,
0
]
}
}
}}
])
Which yields:
{ "_id" : { "assId" : "55f996a4e4b0cc9c0a392593", "sessionId" : "1234" }, "viewCount" : 0, "sessionCount" : 2 }
{ "_id" : { "assId" : "55f996a4e4b0cc9c0a392593", "sessionId" : "123" }, "viewCount" : 0, "sessionCount" : 1 }
{ "_id" : { "assId" : "55f996a4e4b0cc9c0a392593" }, "viewCount" : 1, "sessionCount" : 0 }
Or use the $addToSet operator to return an array of unique sessionId and $unwind the array then regroup your documents.
db.collection.aggregate([
{ "$match": { "assetId": { "$ne": null }}},
{ "$group": {
"_id": "$assetId",
"sessionId": { "$addToSet": "$sessionId" },
"viewCount": {
"$sum": {
"$cond": [
{ "$eq": [ "$action", "agAsset" ] },
1,
0
]
}
}
}},
{ "$unwind": "$sessionId" },
{ "$group": {
"_id": "$_id",
"viewCount": { "$first": "$viewCount" },
"sessionCount": { "$sum": 1 }
}}
])
Which returns:
{ "_id" : "55f996a4e4b0cc9c0a392593", "viewCount" : 1, "sessionCount" : 2 }

MongoDB aggregate group array to key : sum value

Hello I am new to mongodb and trying to convert objects with different types (int) into key value pairs.
I have collection like this:
{
"_id" : ObjectId("5372a9fc0079285635db14d8"),
"type" : 1,
"stat" : "foobar"
},
{
"_id" : ObjectId("5372aa000079285635db14d9"),
"type" : 1,
"stat" : "foobar"
},
{
"_id" : ObjectId("5372aa010079285635db14da"),
"type" : 2,
"stat" : "foobar"
},{
"_id" : ObjectId("5372aa030079285635db14db"),
"type" : 3,
"stat" : "foobar"
}
I want to get result like this:
{
"type1" : 2, "type2" : 1, "type3" : 1,
"stat" : "foobar"
}
Currently trying aggregation group and then push type values to array
db.types.aggregate(
{$group : {
_id : "$stat",
types : {$push : "$type"}
}}
)
But don't know how to sum different types and to convert it into key values
/* 0 */
{
"result" : [
{
"_id" : "foobar",
"types" : [
1,
2,
2,
3
]
}
],
"ok" : 1
}
For your actual form, and therefore presuming that you actually know the possible values for "type" then you can do this with two $group stages and some use of the $cond operator:
db.types.aggregate([
{ "$group": {
"_id": {
"stat": "$stat",
"type": "$type"
},
"count": { "$sum": 1 }
}},
{ "$group": {
"_id": "$_id.stat",
"type1": { "$sum": { "$cond": [
{ "$eq": [ "$_id.type", 1 ] },
"$count",
0
]}},
"type2": { "$sum": { "$cond": [
{ "$eq": [ "$_id.type", 2 ] },
"$count",
0
]}},
"type3": { "$sum": { "$cond": [
{ "$eq": [ "$_id.type", 3 ] },
"$count",
0
]}}
}}
])
Which gives exactly:
{ "_id" : "foobar", "type1" : 2, "type2" : 1, "type3" : 1 }
I actually prefer the more dynamic form with two $group stages though:
db.types.aggregate([
{ "$group": {
"_id": {
"stat": "$stat",
"type": "$type"
},
"count": { "$sum": 1 }
}},
{ "$group": {
"_id": "$_id.stat",
"types": { "$push": {
"type": "$_id.type",
"count": "$count"
}}
}}
])
Not the same output but functional and flexible to the values:
{
"_id" : "foobar",
"types" : [
{
"type" : 3,
"count" : 1
},
{
"type" : 2,
"count" : 1
},
{
"type" : 1,
"count" : 2
}
]
}
Otherwise if you need the same output format but need the flexible fields then you can always use mapReduce, but it's not exactly the same output.
db.types.mapReduce(
function () {
var obj = { };
var key = "type" + this.type;
obj[key] = 1;
emit( this.stat, obj );
},
function (key,values) {
var obj = {};
values.forEach(function(value) {
for ( var k in value ) {
if ( !obj.hasOwnProperty(k) )
obj[k] = 0;
obj[k]++;
}
});
return obj;
},
{ "out": { "inline": 1 } }
)
And in typical mapReduce style:
"results" : [
{
"_id" : "foobar",
"value" : {
"type1" : 2,
"type2" : 1,
"type3" : 1
}
}
],
But those are your options
Is this close enough for you?
{ "_id" : "foobar", "types" : [ { "type" : "type3", "total" : 1 }, { "type" : "type2", "total" : 1 }, { "type" : "type1", "total" : 2 } ] }
The types are in an array, but it seems to get you the data you are looking for. Code is:
db.types.aggregate(
[{$group : {
_id : "$stat",
types : {$push : "$type"}
}},
{$unwind:"$types"},
{$group: {
_id:{stat:"$_id",
types: {$substr: ["$types", 0, 1]}},
total:{$sum:1}}},
{$project: {
_id:0,
stat:"$_id.stat",
type: { $concat: [ "type", "$_id.types" ] },
total:"$total" }},
{$group: {
_id: "$stat",
types: { $push: { type: "$type", total: "$total" } } }}
]
)