MongoDB aggregation query based on multiple fields with similar values - mongodb

I have documents that look like this:
{
"_id" : "001",
"a" : {
"b" : {
"c" : {
"custId" : "cust1"
},
"d" : {
"custId" : "cust2"
}
}
}
}
{
"_id" : "002",
"a" : {
"b" : {
"c" : {
"custId" : "cust1"
},
"d" : {
"custId" : "cust3"
}
}
}
}
{
"_id" : "003",
"a" : {
"b" : {
"c" : {
"custId" : null
},
"d" : {
"custId" : "cust2"
}
}
}
}
{
"_id" : "004",
"a" : {
"b" : {
"c" : {
"custId" : null
},
"d" : {
"custId" : "cust1"
}
}
}
}
I would like to obtain an aggregation which shows a sorted count of customer ids, ignoring null customer ids, like this:
{
"_id" : "cust1",
"count" : 3,
"records" : [
"001", "002", "004"
]
}
{
"_id" : "cust2",
"count" : 2,
"records" : [
"001", "003"
]
}
{
"_id" : "cust3",
"count" : 1,
"records" : [
"002"
]
}
I think each document needs to be broken down into 1 or 2 customer based arrays than then unwound back into documents, but I have been unable to determine a workable solution.

make an array of custId, $map to iterate loop of b after converting from object to array using $objectToArray
$unwind deconstruct custIds array
$match to filter none null custIds documents
$group by custIds and get count of total records and make unique array of _id using $addToset
db.collection.aggregate([
{
$project: {
custIds: {
$map: {
input: { $objectToArray: "$a.b" },
in: "$$this.v.custId"
}
}
}
},
{ $unwind: "$custIds" },
{ $match: { custIds: { $ne: null } } },
{
$group: {
_id: "$custIds",
count: { $sum: 1 },
records: { $addToSet: "$_id" }
}
}
])
Playground

Related

Group by array element in Mongodb

We have nested document and trying to group by array element. Our document structure looks like
/* 1 */
{
"_id" : ObjectId("5a690a4287e0e50010af1432"),
"slug" : [
"true-crime-the-10-most-infamous-american-murder-mysteries",
"10-most-infamous-american-murder-mysteries"
],
"tags" : [
{
"id" : "59244aa6b1be5055278e9b5b",
"name" : "true crime",
"_id" : "59244aa6b1be5055278e9b5b"
},
{
"id" : "5924524db1be5055278ebd6e",
"name" : "Occult Museum",
"_id" : "5924524db1be5055278ebd6e"
},
{
"id" : "5a690f0fc1a72100110c2656",
"_id" : "5a690f0fc1a72100110c2656",
"name" : "murder mysteries"
},
{
"id" : "59244d71b1be5055278ea654",
"name" : "unsolved murders",
"_id" : "59244d71b1be5055278ea654"
}
]
}
We want to find list of all slugs group by tag name. I am trying with following and it gets result but it isn't accurate. We have hundreds of records with each tag but i only get few with my query. I am not sure what i am doing wrong here.
Thanks in advance.
// Requires official MongoShell 3.6+
db.getCollection("test").aggregate(
[
{
"$match" : {
"item_type" : "Post",
"site_id" : NumberLong(2),
"status" : NumberLong(1)
}
},
{$unwind: "$tags" },
{
"$group" : {
"_id" : {
"tags᎐name" : "$tags.name",
"slug" : "$slug"
}
}
},
{
"$project" : {
"tags.name" : "$_id.tags᎐name",
"slug" : "$_id.slug",
"_id" : NumberInt(0)
}
}
],
{
"allowDiskUse" : true
}
);
Expected output is
TagName Slug
----------
true crime "true-crime-the-10-most-infamous-american-murder-mysteries",
"10-most-infamous-american-murder-mysteries"
"All records where tags true crime"
Instead of using slug as a part of _id you should use $push or $addToSet to accumulate them, try:
db.test.aggregate([
{
$unwind: "$tags"
},
{
$unwind: "$slug"
},
{
$group: {
_id: "$tags.name",
slugs: { $addToSet: "$slug" }
}
},
{
$project: {
_id: 1,
slugs: {
$reduce: {
input: "$slugs",
initialValue: "",
in: {
$concat: [ "$$value", ",", "$$this" ]
}
}
}
}
}
])
EDIT: to get comma separated string for slugs you can use $reduce with $concat
Output:
{ "_id" : "murder mysteries", "slugs" : ",10-most-infamous-american-murder-mysteries,true-crime-the-10-most-infamous-american-murder-mysteries" }
{ "_id" : "Occult Museum", "slugs" : ",10-most-infamous-american-murder-mysteries,true-crime-the-10-most-infamous-american-murder-mysteries" }
{ "_id" : "unsolved murders", "slugs" : ",10-most-infamous-american-murder-mysteries,true-crime-the-10-most-infamous-american-murder-mysteries" }
{ "_id" : "true crime", "slugs" : ",10-most-infamous-american-murder- mysteries,true-crime-the-10-most-infamous-american-murder-mysteries" }

How to take the duplicate records in mongodb

Here i have two document ,in this documents childNodes array ID is duplicate means , i want to take the userID and pedagogyID of the record,as per my documents second document under childNodes array 798 is coming duplicate, so i want to take the records
Documents
{
"userID" : "A",
"pedagogyID" : "100",
"summary" : {
"LearnProgress" : {
"childNodes" : [
{
"ID" : "123",
"status" : "in-progress"
},
{
"ID" : "456",
"status" : null
},
{
"ID" : "333",
"status" : null
}
],
}
}
}
{
"userID" : "B",
"pedagogyID" : "200",
"summary" : {
"LearnProgress" : {
"childNodes" : [
{
"ID" : "789",
"status" : "in-progress"
},
{
"ID" : "1010",
"status" : null
},
{
"ID" : "789",
"status" : null
}
],
}
}
}
Expected Output
{
"userID" : "B",
"pedagogyID" : "200",
}
MY Code
db.collectionname.aggregate(
[
{"$unwind":"$summary.LearnProgress.childNodes"},
{"$group":{
"_id":{"_id":"$_id","ID":"$summary.LearnProgress.childNodes.ID"},
"userID":{"$first":"$userID"},
"pedagogyID":{"$first":"$pedagogyID"},
"count":{"$sum":1}
}},
{"$match":{"count":{"$gt":1}}},
{"$group":{"_id":{"userID":"$userID","pedagogyID":"$pedagogyID"}}},
{"$replaceRoot":{"newRoot":"$_id"}}
],
{ allowDiskUse:true }
)
You can use below aggregation.
db.colname.aggregate([
{"$unwind":"$summary.LearnProgress.childNodes"},
{"$group":{
"_id":{"_id":"$_id","ID":"$summary.LearnProgress.childNodes.ID"},
"userID":{"$first":"$userID"},
"pedagogyID":{"$first":"$pedagogyID"},
"count":{"$sum":1}
}},
{"$match":{"count":{"$gt":1}}},
{"$group":{"_id":{"userID":"$userID","pedagogyID":"$pedagogyID"}}},
{"$replaceRoot":{"newRoot":"$_id"}}
],{"allowDiskUse":true})
db.collectionname.aggregate(
// Pipeline
[
// Stage 1
{
$unwind: {
path : "$summary.LearnProgress.childNodes",
}
},
// Stage 2
{
$group: {
_id:'$summary.LearnProgress.childNodes.ID',
count:{$sum:1},
pedagogyID:{$first:'$pedagogyID'},
userID:{$first:'$userID'}
}
},
// Stage 3
{
$match: {
count:{$gt:1}
}
},
// Stage 4
{
$project: {
userID:1,
pedagogyID:1,
_id:0
}
},
]
);

Using $match and $unwind to get a count

I am learning MongoDB aggregation pipeline, i tried to get matched count from output of $unwind and $group. I am able to see the results for $unwind and $group. But I am not sure why I didn't get the matched count. Please help to get percentage field greater than 25.
Here's an example document:
{
"_id":ObjectId("599e9dbd8fbad926e712f902"),
"sample":"1",
"attribute":[
{
"functionName":"1",
"percentage":31.6
}
]
}
I tried this:
db.docs3.aggregate({
$unwind:'$attribute'
},
{
$group:{
_id:{
func:"$attribute.functionName",
percen:"$attribute.percentage"
}
}
})
And got this output:
{ "_id" : { "func" : "7", "percen" : 30 } }
{ "_id" : { "func" : "5", "percen" : 23.1 } }
{ "_id" : { "func" : "8", "percen" : 27.8 } }
{ "_id" : { "func" : "6", "percen" : 32.1 } }
{ "_id" : { "func" : "1", "percen" : 31.6 } }
{ "_id" : { "func" : "2", "percen" : 35 } }
{ "_id" : { "func" : "3", "percen" : 7.1 } }
{ "_id" : { "func" : "4", "percen" : 31.6 } }
I tried this:
db.docs3.aggregate({
$unwind:'$attribute'
},
{
$group:{
_id:{
func:"$attribute.functionName",
percen:"$attribute.percentage"
}
}
},
{
$match:{
"attribute.percentage":{
$gt:25
}
}
})
And I got an error.
I'm not sure whether you want a count of
Documents having at least one attribute with percentage > 25
Or
Attributes having percentage > 25
If you are interested in No. 1 then you do not need to unwind the attribute array in order to apply your match. The following will return a count of documents containing at least one attribute with percentage > 25:
db.getCollection('other').aggregate([
{ $match: { 'attribute.percentage': { $gt: 25 } } },
{ $group: { _id: null, count: { $sum: 1 } } }
])
If you are interested in No. 2 then the following will return a count of attributes with percentage > 25:
db.getCollection('other').aggregate([
{ $unwind: '$attribute' },
{ $match: { 'attribute.percentage': { $gt: 25 } } },
{ $group: { _id: null, count: { $sum: 1 } } }
])
For the following documents:
{
"_id" : ObjectId("599e9dbd8fbad926e712f902"),
"sample" : "1",
"attribute" : [
{
"functionName" : "1",
"percentage" : 31.6
}
]
}
{
"_id" : ObjectId("59a54104e7e9cc2109863beb"),
"sample" : "1",
"attribute" : [
{
"functionName" : "2",
"percentage" : 21.2
}
]
}
{
"_id" : ObjectId("59a542c4e7e9cc2109863c45"),
"sample" : "1",
"attribute" : [
{
"functionName" : "2",
"percentage" : 20.2
},
{
"functionName" : "2",
"percentage" : 28.2
},
{
"functionName" : "2",
"percentage" : 35.2
}
]
}
The first command returns count=2, the second command returns count=3.
Use the money sign to get the current value, and use the same field name you used in your $group stage
db.docs3.aggregate({
$unwind:'$attribute'
},
{
$group:{
_id:{
func:"$attribute.functionName",
percen:"$attribute.percentage"
}
}
},
{
$match:{
"$_id.percen":{
$gt:25
}
}
})

how to get this query in mongoDB

I have this collection...
> db.banks.find().pretty()
{
"_id" : ObjectId("54f37cbb44aec3b01b7db8f4"),
"name" : "A",
"branches" : [
{
"branch_id" : 8561,
"name" : "X",
},
{
"branch_id" : 8576,
"name" : "Y",
}
]
}
{
"_id" : ObjectId("54f37cbb44aec3b01b7db8f5"),
"name" : "B",
"branches" : [
{
"branch_id" : 3238,
"name" : "Z",
}
]
}
with this command :
db.banks.aggregate({$project{"branches.name":1,"_id":0}});
get this result :
{ "branches" : { { "name" : "X" }, { "name" : "Y" } } }
{ "branches" : { { "name" : "Z" } } }
but; how I get this result?
(In fact, one object and without "branches".)
{{"name" : "X"}, {"name" : "Y"}, {"name" : "Z"}}
very thanks...
One way you could go about this is to do an $unwind first in the aggregation pipeline to get a deconstructed array with a document for each element and then group by the array element $branches.name:
db.banks.aggregate([
{ $unwind: '$branches'},
{
$group: {
_id: {
name: '$branches.name'
}
}
},
{
$project: {
_id: 0,
name: '$_id.name'
}
},
{ $sort : { "name" : 1 } }
])
Outputs:
{
"result" : [
{
"name" : "X"
},
{
"name" : "Y"
},
{
"name" : "Z"
}
],
"ok" : 1
}

How do I return multiple fields in a MongoDB aggregate query?

I have a collection of MongoDB documents that look like this:
{
"_id" : "123",
"created_by": "bob",
"date_added": ISODate("2014-08-27T17:43:23Z"),
"size": "XL",
"color": "red"
}
The question I'm trying to answer is: What is the color of the item most recently added by each person?
I've gotten this far:
db.stuff.aggregate([
{ $group: { _id: { who: "$created_by"}, added: { $max: "$date_added" } } },
])
{ "_id" : { "who" : "bob" }, "added" : ISODate("2014-09-30T07:06:38.135Z") }
{ "_id" : { "who" : "mike" }, "added" : ISODate("2014-09-30T07:10:03.098Z") }
{ "_id" : { "who" : "mary" }, "added" : ISODate("2014-09-30T07:07:27.787Z") }
{ "_id" : { "who" : "john" }, "added" : ISODate("2014-09-30T07:09:51.418Z") }
However, it only returns the user's name and the date of when the document was added. I can't figure out how to get the query to also return the color. Thank you!
I think this will work for you.
db.stuff.aggregate([ {
$sort : {
date_added : -1
}
}, {
$group : {
_id : {
who : "$created_by"
},
added : {
$first : "$date_added"
},
color : {
$first : "$color"
}
}
} ]);