mongodb $group for nested array element - mongodb

I have following mongodb collection (questionanswers) for a given question
{
"_id" : { "$oid" : "59cda8b730a0131cfafc9101" },
"questionId" : { "$oid" : "59cb7b85c854036fec173267" },
"userQuestionAnswerDetails" : [ { "srno" : 1, "correct_yn" : false}, {
"srno" : 2, "correct_yn" : false} ]
}
{
"_id" : { "$oid" : "59cdb1b930a0131cfafc9102" },
"questionId" : { "$oid" : "59cb7b85c854036fec173267" },
"userQuestionAnswerDetails" : [ { "srno" : 2, "correct_yn" : false} ]
}
{
"_id" : { "$oid" : "59cdbb1b30a0131cfafc9105" },
"questionId" : { "$oid" : "59cb7b85c854036fec173267" },
"userQuestionAnswerDetails" : [ { "srno" : 2, "correct_yn" : false} ]
}
{
"_id" : { "$oid" : "59cdd26a30a0131cfafc9107" },
"questionId" : { "$oid" : "59cb7b85c854036fec173267" },
"userQuestionAnswerDetails" : [ { "srno" : 1, "correct_yn" : false} ]
}
I need cnt per sr no which is inside nested array element.
Expected is:
Srno ===> cnt
1 ===> 2
2 ===> 3
I am trying with below query
db.questionanswers.aggregate([
{
$match:{
$and: [
{questionId: ObjectId("59cb7b85c854036fec173267")}
]
}
},
{
$group : {_id : {questionId:"$questionId", userQuestionAnswerDetails: "$userQuestionAnswerDetails.srno"}, num_tutorial : {$sum : 1}}
}
])
Above query is giving me something similar to below:
{"_id": {"questionId":"ObjectId(59cb7b85c854036fec173267)", "userQuestionAnswerDetails":[1,2]},"num_tutorial":1}
{"_id":{"questionId":"ObjectId(59cb7b85c854036fec173267)", "userQuestionAnswerDetails":[1]},"num_tutorial":1}
{"_id":{"questionId":"ObjectId(59cb7b85c854036fec173267)", "userQuestionAnswerDetails":[2]},"num_tutorial":2}
which is equivalent to
Srno ===> Cnt
1,2 ===> 1
1 ===> 1
2 ===> 2
How can I modify $group so as to achieve the expected result? Thanks in advance.

You can make it by adding $unwind stage.
db.questionanswers.aggregate([
{
$match:{
$and: [
{questionId: ObjectId("59cb7b85c854036fec173267")}
]
}
}
,{
$unwind: "$userQuestionAnswerDetails"
}
,{
$group : {_id : {questionId:"$questionId", userQuestionAnswerDetails: "$userQuestionAnswerDetails.srno"}, num_tutorial : {$sum : 1}}
}
])
Result:
{ "_id" : { "questionId" : ObjectId("59cb7b85c854036fec173267"), "userQuestionAnswerDetails" : 2 }, "num_tutorial" : 3 }
{ "_id" : { "questionId" : ObjectId("59cb7b85c854036fec173267"), "userQuestionAnswerDetails" : 1 }, "num_tutorial" : 2 }

Related

Facing a problem with the lookup in the second (student) table that matches all incoming output records mongodb aggregation

I'm facing a problem with the lookup in the second (student) table that matches all incoming output records of the first(test) table. I have two collections "tests" and "students". "Test" collection contains all school tests and the "student" table contains student's attended tests. Student table contains "pastTest"(test attended in past with status "pass" or "fail")array. I want to retrieve student who passed all incoming tests (we retrieve from the tests table)
test table: _id (primary ket)
student.pastTests.testId (need to match with test._id)
Test Document:
{
"_id" : ObjectId("5c9b5c1005729b2bf23f3290"),
"testDate" : {
"term" : 1,
"week" : 7
},
"retestDate" : {
"term" : 1,
"week" : 10
},
"testOrder" : "1.1",
"testDateScheduled" : true,
"retestDateScheduled" : true
}
Student Document:
{
"_id" : ObjectId("5c92dd994e8e6b2c1647d0d0"),
"completedYears" : [],
"firstName" : "Andrew",
"lastName" : "Jonhson",
"teacherId" : ObjectId("5bf36b1076696374e65feb4f"),
"yearGroup" : "0",
"schoolId" : 40001,
"currentTest" : ObjectId("5c9b5c1005729b2bf23f3290"),
"pastTests" : [
{
"_id" : ObjectId("5d3570645045863d373f6db1"),
"testId" : ObjectId("5c9b5c1005729b2bf23f3290"),
"status" : "pass"
},
{
"_id" : ObjectId("5d425af07708f5636c3bec1c"),
"testId" : ObjectId("5c9b5fc460e39c2c58e44109"),
"status" : "pass"
},
{
"_id" : ObjectId("5d5e54a875fab079f4d03570"),
"testId" : ObjectId("5c9b6492bb581c2ceb553fef"),
"status" : "fail"
},
],
"createdAt" : ISODate("2019-03-21T00:40:57.401Z"),
"updatedAt" : ISODate("2020-09-24T19:55:38.291Z"),
"__v" : 0,
"holdTests" : [],
"completedTests" : [],
"className" : "dd",
}
Query:
db.getCollection('tests').aggregate([
{
$match: {
yearGroup: '-1',
$or : [
{
$and: [
{'retestDateScheduled': true},
{ 'retestDate.term': { $lt: 4 } },
]
},
{
$and: [
{'testDateScheduled': true},
{ 'testDate.term': { $lt: 4 } },
]
}
]
}
},
{
$lookup: {
from: 'students',
let: {testId: '$_id', schoolId: 49014, yearGroup: '-1'},
pipeline: [
]
}
}
])
Note: Initial match query returns all tests of the term-1, now I have to retrieve students who passed in all tests of the term-1.
Lookup stage is pending - facing problem with lookup in second (student) table who match all incoming output records of first(test) collection
Thanks in advance !!
Try this:
db.tests.aggregate([
{
$match: {
// Your match condition
}
},
{
$group: {
_id: null,
term_1_testIds: { $push: "$_id" },
test_count: { $sum: 1 }
}
},
{
$lookup: {
from: "students",
let: { term_1_testIds: '$term_1_testIds', schoolId: 40001, totalTestCount: "$test_count" },
pipeline: [
{
$match: {
$expr: { $eq: ["$schoolId", "$$schoolId"] }
}
},
{ $unwind: "$pastTests" },
{
$match: {
"pastTests.status": "pass",
$expr: { $in: ["$pastTests.testId", "$$term_1_testIds"] }
}
},
{
$group: {
_id: "$_id",
firstName: { $first: "$firstName" },
yearGroup: { $first: "$yearGroup" },
schoolId: { $first: "$schoolId" },
currentTest: { $first: "$currentTest" },
passedTestCount: { $sum: 1 },
pastTests: { $push: "$pastTests" }
}
},
{
$match: {
$expr: { $eq: ["$passedTestCount", "$$totalTestCount"] }
}
}
],
as: "students"
}
}
]);
Output:
{
"_id" : null,
"term_1_testIds" : [
ObjectId("5c9b5c1005729b2bf23f3290"),
ObjectId("5c9b5fc460e39c2c58e44109"),
ObjectId("5c9b6492bb581c2ceb553fef")
],
"test_count" : 3,
"students" : [
{
"_id" : ObjectId("5c92dd994e8e6b2c1647d0d1"),
"firstName" : "Dheemanth",
"yearGroup" : "0",
"schoolId" : 40001,
"currentTest" : ObjectId("5c9b5c1005729b2bf23f3290"),
"passedTestCount" : 3,
"pastTests" : [
{
"_id" : ObjectId("5d3570645045863d373f6db1"),
"testId" : ObjectId("5c9b5c1005729b2bf23f3290"),
"status" : "pass"
},
{
"_id" : ObjectId("5d425af07708f5636c3bec1c"),
"testId" : ObjectId("5c9b5fc460e39c2c58e44109"),
"status" : "pass"
},
{
"_id" : ObjectId("5d5e54a875fab079f4d03570"),
"testId" : ObjectId("5c9b6492bb581c2ceb553fef"),
"status" : "pass"
}
]
}
]
}
This how my tests collection looks like
/* 1 createdAt:3/27/2019, 5:24:58 PM*/
{
"_id" : ObjectId("5c9b6492bb581c2ceb553fef"),
"name" : "Test 3"
},
/* 2 createdAt:3/27/2019, 5:04:28 PM*/
{
"_id" : ObjectId("5c9b5fc460e39c2c58e44109"),
"name" : "Test 2"
},
/* 3 createdAt:3/27/2019, 4:48:40 PM*/
{
"_id" : ObjectId("5c9b5c1005729b2bf23f3290"),
"name" : "Test 1"
}
This is how my students collection looks like:
/* 1 createdAt:3/21/2019, 6:10:57 AM*/
{
"_id" : ObjectId("5c92dd994e8e6b2c1647d0d1"),
"firstName" : "Dheemanth",
"yearGroup" : "0",
"schoolId" : 40001,
"currentTest" : ObjectId("5c9b5c1005729b2bf23f3290"),
"pastTests" : [
{
"_id" : ObjectId("5d3570645045863d373f6db1"),
"testId" : ObjectId("5c9b5c1005729b2bf23f3290"),
"status" : "pass"
},
{
"_id" : ObjectId("5d425af07708f5636c3bec1c"),
"testId" : ObjectId("5c9b5fc460e39c2c58e44109"),
"status" : "pass"
},
{
"_id" : ObjectId("5d5e54a875fab079f4d03570"),
"testId" : ObjectId("5c9b6492bb581c2ceb553fef"),
"status" : "pass"
}
]
},
/* 2 createdAt:3/21/2019, 6:10:57 AM*/
{
"_id" : ObjectId("5c92dd994e8e6b2c1647d0d0"),
"firstName" : "Andrew",
"yearGroup" : "0",
"schoolId" : 40001,
"currentTest" : ObjectId("5c9b5c1005729b2bf23f3290"),
"pastTests" : [
{
"_id" : ObjectId("5d3570645045863d373f6db1"),
"testId" : ObjectId("5c9b5c1005729b2bf23f3290"),
"status" : "pass"
},
{
"_id" : ObjectId("5d425af07708f5636c3bec1c"),
"testId" : ObjectId("5c9b5fc460e39c2c58e44109"),
"status" : "pass"
},
{
"_id" : ObjectId("5d5e54a875fab079f4d03570"),
"testId" : ObjectId("5c9b6492bb581c2ceb553fef"),
"status" : "fail"
}
]
}
Also:
In your first $match stage, $and operator is redundant inside $or array it should be like this:
{
$match: {
yearGroup: '-1',
$or: [
{
'retestDateScheduled': true,
'retestDate.term': { $lt: 4 }
},
{
'testDateScheduled': true,
'testDate.term': { $lt: 4 }
}
]
}
}

mongodb aggregate sum item as nested data

Here is my some sample data in collection sale
[
{group:2, item:a, qty:3 },
{group:2, item:b, qty:3 },
{group:2, item:b, qty:2 },
{group:1, item:a, qty:3 },
{group:1, item:a, qty:5 },
{group:1, item:b, qty:5 }
]
and I want to query data like below and sort the popular group to the top
[
{ group:1, items:[{name:'a',total_qty:8},{name:'b',total_qty:5} ],total_qty:13 },
{ group:2, items:[{name:'a',total_qty:3},{name:'b',total_qty:5} ],total_qty:8 },
]
Actually we can loop in server script( php, nodejs ...) but the problem is pagination. I cannot use skip to get the right result.
The following query can get us the expected output:
db.collection.aggregate([
{
$group:{
"_id":{
"group":"$group",
"item":"$item"
},
"group":{
$first:"$group"
},
"item":{
$first:"$item"
},
"total_qty":{
$sum:"$qty"
}
}
},
{
$group:{
"_id":"$group",
"group":{
$first:"$group"
},
"items":{
$push:{
"name":"$item",
"total_qty":"$total_qty"
}
},
"total_qty":{
$sum:"$total_qty"
}
}
},
{
$project:{
"_id":0
}
}
]).pretty()
Data set:
{
"_id" : ObjectId("5d84a37febcbd560107c54a7"),
"group" : 2,
"item" : "a",
"qty" : 3
}
{
"_id" : ObjectId("5d84a37febcbd560107c54a8"),
"group" : 2,
"item" : "b",
"qty" : 3
}
{
"_id" : ObjectId("5d84a37febcbd560107c54a9"),
"group" : 2,
"item" : "b",
"qty" : 2
}
{
"_id" : ObjectId("5d84a37febcbd560107c54aa"),
"group" : 1,
"item" : "a",
"qty" : 3
}
{
"_id" : ObjectId("5d84a37febcbd560107c54ab"),
"group" : 1,
"item" : "a",
"qty" : 5
}
{
"_id" : ObjectId("5d84a37febcbd560107c54ac"),
"group" : 1,
"item" : "b",
"qty" : 5
}
Output:
{
"group" : 2,
"items" : [
{
"name" : "b",
"total_qty" : 5
},
{
"name" : "a",
"total_qty" : 3
}
],
"total_qty" : 8
}
{
"group" : 1,
"items" : [
{
"name" : "b",
"total_qty" : 5
},
{
"name" : "a",
"total_qty" : 8
}
],
"total_qty" : 13
}
You need to use $group aggregation with $sum and $push accumulator
db.collection.aggregate([
{ "$group": {
"_id": "$group",
"items": { "$push": "$$ROOT" },
"total_qty": { "$sum": "$qty" }
}},
{ "$sort": { "total_qty": -1 }}
])

mongodb aggregation $group and then $push a object

this is my data :
> db.bookmarks.find({"userId" : "56b9b74bf976ab70ff6b9999"}).pretty()
{
"_id" : ObjectId("56c2210fee4a33579f4202dd"),
"userId" : "56b9b74bf976ab70ff6b9999",
"items" : [
{
"itemId" : "28",
"timestamp" : "2016-02-12T18:07:28Z"
},
{
"itemId" : "29",
"timestamp" : "2016-02-12T18:07:29Z"
},
{
"itemId" : "30",
"timestamp" : "2016-02-12T18:07:30Z"
},
{
"itemId" : "31",
"timestamp" : "2016-02-12T18:07:31Z"
},
{
"itemId" : "32",
"timestamp" : "2016-02-12T18:07:32Z"
},
{
"itemId" : "33",
"timestamp" : "2016-02-12T18:07:33Z"
},
{
"itemId" : "34",
"timestamp" : "2016-02-12T18:07:34Z"
}
]
}
I want to have something like (actually i hope the _id can become userId too) :
{
"_id" : "56b9b74bf976ab70ff6b9999",
"items" : [
{ "itemId": "32", "timestamp": "2016-02-12T18:07:32Z" },
{ "itemId": "31", "timestamp": "2016-02-12T18:07:31Z" },
{ "itemId": "30", "timestamp": "2016-02-12T18:07:30Z" }
]
}
What I have now :
> db.bookmarks.aggregate(
... { $match: { "userId" : "56b9b74bf976ab70ff6b9999" } },
... { $unwind: '$items' },
... { $sort: { 'items.timestamp': -1} },
... { $skip: 2 },
... { $limit: 3},
... { $group: { '_id': '$userId' , items: { $push: '$items.itemId' } } }
... ).pretty()
{ "_id" : "56b9b74bf976ab70ff6b9999", "items" : [ "32", "31", "30" ] }
i tried to read the document in mongo and find out i can $push, but somehow i cannot find a way to push such object, which is not defined anywhere in the whole object. I want to have the timestamp also.. but i don't know how should i modified the $group (or others??) to do so. thanks for helping!
This code, which I tested in the MongoDB 3.2.1 shell, should give you the output format that you want:
> db.bookmarks.aggregate(
{ "$match" : { "userId" : "Ursula" } },
{ "$unwind" : "$items" },
{ "$sort" : { "items.timestamp" : -1 } },
{ "$skip" : 2 },
{ "$limit" : 3 },
{ "$group" : { "_id" : "$userId", items: { "$push" : { "myPlace" : "$items.itemId", "myStamp" : "$items.timestamp" } } } } ).pretty()
Running the above will produce this output:
{
"_id" : "Ursula",
"items" : [
{
"myPlace" : "52",
"myStamp" : ISODate("2016-02-13T18:07:32Z")
},
{
"myPlace" : "51",
"myStamp" : ISODate("2016-02-13T18:07:31Z")
},
{
"myPlace" : "50",
"myStamp" : ISODate("2016-02-13T18:07:30Z")
}
]
}
In MongoDB version 3.2.x, you can also use the $out operator in the very last stage of the aggregation pipeline, and have the output of the aggregation query written to a collection. Here is the code I used:
> db.bookmarks.aggregate(
{ "$match" : { "userId" : "Ursula" } },
{ "$unwind" : "$items" },
{ "$sort" : { "items.timestamp" : -1 } },
{ "$skip" : 2 },
{ "$limit" : 3 },
{ "$group" : { "_id" : "$userId", items: { "$push" : { "myPlace" : "$items.itemId", "myStamp" : "$items.timestamp" } } } },
{ "$out" : "ursula" } )
This gives me a collection named "ursula":
> show collections
ursula
and I can query that collection:
> db.ursula.find().pretty()
{
"_id" : "Ursula",
"items" : [
{
"myPlace" : "52",
"myStamp" : ISODate("2016-02-13T18:07:32Z")
},
{
"myPlace" : "51",
"myStamp" : ISODate("2016-02-13T18:07:31Z")
},
{
"myPlace" : "50",
"myStamp" : ISODate("2016-02-13T18:07:30Z")
}
]
}
>
Last of all, this is the input document I used in the aggregation query. You can compare this document to how I coded the aggregation query to see how I built the new items array.
> db.bookmarks.find( { "userId" : "Ursula" } ).pretty()
{
"_id" : ObjectId("56c240ed55f2f6004dc3b25c"),
"userId" : "Ursula",
"items" : [
{
"itemId" : "48",
"timestamp" : ISODate("2016-02-13T18:07:28Z")
},
{
"itemId" : "49",
"timestamp" : ISODate("2016-02-13T18:07:29Z")
},
{
"itemId" : "50",
"timestamp" : ISODate("2016-02-13T18:07:30Z")
},
{
"itemId" : "51",
"timestamp" : ISODate("2016-02-13T18:07:31Z")
},
{
"itemId" : "52",
"timestamp" : ISODate("2016-02-13T18:07:32Z")
},
{
"itemId" : "53",
"timestamp" : ISODate("2016-02-13T18:07:33Z")
},
{
"itemId" : "54",
"timestamp" : ISODate("2016-02-13T18:07:34Z")
}
]
}

Use field value as key

I am doing this query
db.analytics.aggregate([
{
$match: {"event":"USER_SENTIMENT"}
},
{ $group: {
_id: {brand:"$data.brandId",sentiment:"$data.sentiment"},
count: {$sum : 1}
}
},
{ $group: {
_id: "$_id.brand",
sentiments: {$addToSet : {sentiment:"$_id.sentiment", count:"$count"}}
}
}
])
Which generates that :
{
"result" : [
{
"_id" : 57,
"sentiments" : [
{
"sentiment" : "Meh",
"count" : 4
}
]
},
{
"_id" : 376,
"sentiments" : [
{
"sentiment" : "Meh",
"count" : 1
},
{
"sentiment" : "Happy",
"count" : 1
},
{
"sentiment" : "Confused",
"count" : 1
}
]
}
],
"ok" : 1
}
But What I want is that :
[
{
"_id" : 57,
"Meh" : 4
},
{
"_id" : 376,
"Meh" : 1,
"Happy" : 1,
"Confused" : 1
}
]
Any idea on how to transform that? The blocking point for me is to transform a value into a key.

MongoDB aggregation framework $subtract

I'm want use mongodb to achieve simple query like mysql "select a-b from table", but aggregation framework query result is not right.
data:
{ "_id" : ObjectId("511223348a88785127a0d13f"), "a" : 1, "b" : 1, "name" : "xxxxx0" }
{ "_id" : ObjectId("511223348a88785127a0d13f"), "a" : 2, "b" : 2, "name" : "xxxxx1" }
mongodb cmd:
db.site.aggregate([
{ $match: {
"a" : {$exists:true},
"b" : {$exists:true},
}
},
{ $project: { _id : 0,name : 1,
r1: {$subtract:["$a", "$b"]} }
},
{ $limit: 100 },
]);
"result" : [
{
"name" : "xxxx1",
"r1" : -1
},
{
"name" : "xxxx0",
"r1" : -2
},
]
I cannot replicate your behaviour:
> db.tg.find()
{ "_id" : ObjectId("511223348a88785127a0d13f"), "a" : 1, "b" : 1, "name" : "xxxxx0" }
> db.tg.aggregate([{ $match: { "a" : {$exists:true}, "b" : {$exists:true} } }, { $project: { _id : 0,name : 1, r1: {$subtract:["$a", "$b"]} }}, { $limit: 100 }])
{ "result" : [ { "name" : "xxxxx0", "r1" : 0 } ], "ok" : 1 }
Can you give us a little more info like your MongoDB version?