Mongo Aggregate Group List / Filtered Results - mongodb

Here my records for mongdb
{ "_id" : 1,"userId" : "x", "name" : "Central", "borough": "Manhattan"},
{ "_id" : 2,"userId" : "x", "name" : "Rock", "borough" : "Queens"},
{ "_id" : 3,"userId" : "y", "name" : "Empire", "borough" : "Brooklyn"},
{ "_id" : 4,"userId" : "y", "name" : "Stana", "borough" : "Manhattan"},
{ "_id" : 5,"userId" : "y", "name" : "Jane", "borough" :"Brooklyn"}
how can we take result with aggregate by userId field like this
[
{
x : [{"_id":1,"name":"Central"},{"_id":2,"name":"Rock"}],
y:[{ "_id" : 3,"name":"Empire"},{ "_id" : 4,"name":"Stana"},{ "_id" : 5,"name":"Jane"}]
}
]

$group by userId and construct docs array with required fields
$group by null and construct array of docs in key-value format
$arrayToObject convert docs array to object
$replaceRoot to replace above converted object to root
db.collection.aggregate([
{
$group: {
_id: "$userId",
docs: {
$push: {
_id: "$_id",
name: "$name"
}
}
}
},
{
$group: {
_id: null,
docs: {
$push: {
k: "$_id",
v: "$docs"
}
}
}
},
{ $replaceRoot: { newRoot: { $arrayToObject: "$docs" } } }
])
Playground

Related

Filter by nested arrays/objects values (on different levels) and $push by multiple level - MongoDB Aggregate

I have a document with multiple level of embedded subdocument each has some nested array. Using $unwind and sort, do sorting based on day in descending and using push to combine each row records into single array. This Push is working only at one level means it allows only one push. If want to do the same things on the nested level and retains the top level data, got "errmsg" : "Unrecognized expression '$push'".
{
"_id" : ObjectId("5f5638d0ff25e01482432803"),
"name" : "XXXX",
"mobileNo" : 323232323,
"payroll" : [
{
"_id" : ObjectId("5f5638d0ff25e01482432801"),
"month" : "Jan",
"salary" : 18200,
"payrollDetails" : [
{
"day" : "1",
"salary" : 200,
},
{
"day" : "2",
"salary" : 201,
}
]
},
{
"_id" : ObjectId("5f5638d0ff25e01482432802"),
"month" : "Feb",
"salary" : 8300,
"payrollDetails" : [
{
"day" : "1",
"salary" : 300,
},
{
"day" : "2",
"salary" : 400,
}
]
}
],
}
Expected Result:
{
"_id" : ObjectId("5f5638d0ff25e01482432803"),
"name" : "XXXX",
"mobileNo" : 323232323,
"payroll" : [
{
"_id" : ObjectId("5f5638d0ff25e01482432801"),
"month" : "Jan",
"salary" : 18200,
"payrollDetails" : [
{
"day" : "2",
"salary" : 201
},
{
"day" : "1",
"salary" : 200
}
]
},
{
"_id" : ObjectId("5f5638d0ff25e01482432802"),
"month" : "Feb",
"salary" : 8300,
"payrollDetails" : [
{
"day" : "2",
"salary" : 400
},
{
"day" : "1",
"salary" : 300
}
]
}
],
}
Just day will be sorted and remaining things are same
I have tried but it got unrecognized expression '$push'
db.employee.aggregate([
{$unwind: '$payroll'},
{$unwind: '$payroll.payrollDetails'},
{$sort: {'payroll.payrollDetails.day': -1}},
{$group: {_id: '$_id', payroll: {$push: {payrollDetails:{$push:
'$payroll.payrollDetails'} }}}}])
It requires two time $group, you can't use $push operator two times in a field,
$group by main id and payroll id, construct payrollDetails array
$sort by payroll id (you can skip if not required)
$group by main id and construct payroll array
db.employee.aggregate([
{ $unwind: "$payroll" },
{ $unwind: "$payroll.payrollDetails" },
{ $sort: { "payroll.payrollDetails.day": -1 } },
{
$group: {
_id: {
_id: "$_id",
pid: "$payroll._id"
},
name: { $first: "$name" },
mobileNo: { $first: "$mobileNo" },
payrollDetails: { $push: "$payroll.payrollDetails" },
month: { $first: "$payroll.month" },
salary: { $first: "$payroll.salary" }
}
},
{ $sort: { "payroll._id": -1 } },
{
$group: {
_id: "$_id._id",
name: { $first: "$name" },
mobileNo: { $first: "$mobileNo" },
payroll: {
$push: {
_id: "$_id.pid",
month: "$month",
salary: "$salary",
payrollDetails: "$payrollDetails"
}
}
}
}
])
Playground

Group by array element in Mongodb

We have nested document and trying to group by array element. Our document structure looks like
/* 1 */
{
"_id" : ObjectId("5a690a4287e0e50010af1432"),
"slug" : [
"true-crime-the-10-most-infamous-american-murder-mysteries",
"10-most-infamous-american-murder-mysteries"
],
"tags" : [
{
"id" : "59244aa6b1be5055278e9b5b",
"name" : "true crime",
"_id" : "59244aa6b1be5055278e9b5b"
},
{
"id" : "5924524db1be5055278ebd6e",
"name" : "Occult Museum",
"_id" : "5924524db1be5055278ebd6e"
},
{
"id" : "5a690f0fc1a72100110c2656",
"_id" : "5a690f0fc1a72100110c2656",
"name" : "murder mysteries"
},
{
"id" : "59244d71b1be5055278ea654",
"name" : "unsolved murders",
"_id" : "59244d71b1be5055278ea654"
}
]
}
We want to find list of all slugs group by tag name. I am trying with following and it gets result but it isn't accurate. We have hundreds of records with each tag but i only get few with my query. I am not sure what i am doing wrong here.
Thanks in advance.
// Requires official MongoShell 3.6+
db.getCollection("test").aggregate(
[
{
"$match" : {
"item_type" : "Post",
"site_id" : NumberLong(2),
"status" : NumberLong(1)
}
},
{$unwind: "$tags" },
{
"$group" : {
"_id" : {
"tags᎐name" : "$tags.name",
"slug" : "$slug"
}
}
},
{
"$project" : {
"tags.name" : "$_id.tags᎐name",
"slug" : "$_id.slug",
"_id" : NumberInt(0)
}
}
],
{
"allowDiskUse" : true
}
);
Expected output is
TagName Slug
----------
true crime "true-crime-the-10-most-infamous-american-murder-mysteries",
"10-most-infamous-american-murder-mysteries"
"All records where tags true crime"
Instead of using slug as a part of _id you should use $push or $addToSet to accumulate them, try:
db.test.aggregate([
{
$unwind: "$tags"
},
{
$unwind: "$slug"
},
{
$group: {
_id: "$tags.name",
slugs: { $addToSet: "$slug" }
}
},
{
$project: {
_id: 1,
slugs: {
$reduce: {
input: "$slugs",
initialValue: "",
in: {
$concat: [ "$$value", ",", "$$this" ]
}
}
}
}
}
])
EDIT: to get comma separated string for slugs you can use $reduce with $concat
Output:
{ "_id" : "murder mysteries", "slugs" : ",10-most-infamous-american-murder-mysteries,true-crime-the-10-most-infamous-american-murder-mysteries" }
{ "_id" : "Occult Museum", "slugs" : ",10-most-infamous-american-murder-mysteries,true-crime-the-10-most-infamous-american-murder-mysteries" }
{ "_id" : "unsolved murders", "slugs" : ",10-most-infamous-american-murder-mysteries,true-crime-the-10-most-infamous-american-murder-mysteries" }
{ "_id" : "true crime", "slugs" : ",10-most-infamous-american-murder- mysteries,true-crime-the-10-most-infamous-american-murder-mysteries" }

Get average per year and label?

I can use this query to get the average sqmPrice for a myArea
db.getCollection('sold').aggregate([
{$match:{}},
{$group: {_id: "$myArea", "sqmPrice": {$avg: "$sqmPrice"} }}
])
Output:
[
{
"_id" : "Yttre Aspudden",
"sqmPrice" : 48845.7777777778
},
{
"_id" : "Hägerstensåsen",
"sqmPrice" : 120
}
]
I would like to group this by year, ideally an object that looks like this:
{
"Yttre Aspudden": {
2008: 1232,
2009: 1244
...
}
...
}
but the formatting is not the most important.
Here is a sample object, I would like to use soldDate:
{
"_id" : ObjectId("5beca41c78f21248ab47f4a6"),
"location" : {
"address" : {
"streetAddress" : "Ljusstöparbacken 26C"
},
"position" : {
"latitude" : 59.31427884,
"longitude" : 18.00892421
},
"namedAreas" : [
"Hägersten-Liljeholmen"
],
"region" : {
"municipalityName" : "Stockholm",
"countyName" : "Stockholms län"
},
"distance" : {
"ocean" : 3777
}
},
"listPrice" : 1895000,
"rent" : 1959,
"floor" : 1,
"livingArea" : 38.5,
"source" : {
"name" : "Fastighetsbyrån",
"id" : 1573,
"type" : "Broker",
"url" : "http://www.fastighetsbyran.se/"
},
"rooms" : 1.5,
"published" : ISODate("2018-11-02T20:55:19.000Z"),
"constructionYear" : 1959,
"objectType" : "Lägenhet",
"booliId" : 3278478,
"soldDate" : ISODate("2018-11-14T00:00:00.000Z"),
"soldPrice" : 2620000,
"soldPriceSource" : "bid",
"url" : "https://www.booli.se/annons/3278478",
"publishedDays" : 1735,
"soldDays" : 1747,
"daysUp" : 160,
"street" : "Ljusstöparbacken",
"streetYear" : "Ljusstöparbacken Hägersten-Liljeholmen 1959",
"yearDay" : 318,
"yearWeek" : 46,
"roughSize" : 40,
"sqmPrice" : 49221,
"myArea" : "Gröndal",
"hotlist" : true
}
You need to generate your keys dynamically so you have to use $arrayToObject. To build an object which aggregates the data you need three $group stages and to create new root of your document you can use $replaceRoot, try:
db.sold.aggregate([
{ $group: {_id: { area: "$myArea", year: { $year: "$soldDate" } }, "sqmPrice": {$avg: "$sqmPrice"} }},
{ $group: { _id: "$_id.area", avgs: { $push: { k: { $toString: "$_id.year" }, v: "$sqmPrice" } } } },
{ $group: { _id: null, areas: { $push: { k: "$_id", v: { $arrayToObject: "$avgs" } } } } },
{ $replaceRoot: { newRoot: { $arrayToObject: "$areas" } } }
])

How to find duplicate entries in collection using aggregation

{ "_id" : ObjectId("4f127fa55e7242718200002d"), "id":1, "name" : "foo"}
{ "_id" : ObjectId("4f127fa55e7242718200002d"), "id":2, "name" : "bar"}
{ "_id" : ObjectId("4f127fa55e7242718200002d"), "id":3, "name" : "baz"}
{ "_id" : ObjectId("4f127fa55e7242718200002d"), "id":4, "name" : "foo"}
{ "_id" : ObjectId("4f127fa55e7242718200002d"), "id":5, "name" : "bar"}
{ "_id" : ObjectId("4f127fa55e7242718200002d"), "id":6, "name" : "bar"}
I want to find all the duplicated entries in this collection by the "name" field using aggregation. E.g. "foo" appears twice and "bar" appears 3 times.
You can use group stage in aggrgation
db.collection.aggregate([{
$group: {
_id: "$name",
count: { $sum: 1 },
name: { $first: "$name" }
}
}])
You can group by name and count. And then filter with a count greater that 1.
db.collection.aggregate([
{
$group: {
_id: "$name",
count: { $sum: 1 }
}
},
{
$match:{count:{$gt:1}}
}
])
Output:
{ "_id" : "foo", "count":2}
{ "_id" : "bar", "count":3}

MongoDb get distinct items after grouping

I'm using mongodb with the following collection sample
{
"_id" : ObjectId("5703750ca9c436386c4814c9"),
"user_id" : NumberLong(17),
"activitytype_id" : NumberLong(1),
"created_date" : ISODate("2015-10-03T03:52:03.000Z")
},
{
"_id" : ObjectId("5703750ca9c436386c4814ca"),
"s_id" : NumberLong(132919),
"user_id" : NumberLong(17),
"activitytype_id" : NumberLong(4),
"created_date" : ISODate("2016-03-18T17:13:43.000Z")
},
{
"_id" : ObjectId("5703750ca9c436386c4814cb"),
"s_id" : NumberLong(215283),
"user_id" : NumberLong(17),
"activitytype_id" : NumberLong(4),
"created_date" : ISODate("2015-10-03T04:12:33.000Z")
}
,
{
"_id" : ObjectId("5703750ca9c436386c4814cc"),
"s_id" : NumberLong(360888),
"user_id" : NumberLong(17),
"activitytype_id" : NumberLong(4),
"created_date" : ISODate("2015-10-03T04:12:41.000Z")
}
This is my aggregation pipeline
db.activitylogs.aggregate([
{ $group: {
_id: {
user_id: "$user_id",
activitytype_id: "$activitytype_id"
},
activity_log_docs: {
$addToSet: {
s_id: "$s_id",
friend_id: "$friend_id",
playlist_id: "$playlist_id",
created_date:"$created_date"
}
}
}},
])
I need to get distinct s_id in activity_log_docs.
here is a screenshot for the result,
screen shot for the result
i need to avoid duplicated s_id in activity_log_docs array, so i will get distinct s_id
I think something like this should do :
db.activitylogs.aggregate([
{ $group: {
_id: {
user_id: "$user_id",
activitytype_id: "$activitytype_id" ,
s_id:"$s_id"
},
friend_id: {$first:"$friend_id"}}},
playlist_id: {$first:"$playlist_id"}}},
created_date: {$first:"$created_date"}}},
{ $group: {
_id: {
user_id: "$_id.user_id",
activitytype_id: "$_id.activitytype_id"
},
activity_log_docs: {
$addToSet: {
s_id: "$_id.s_id",
friend_id: "$friend_id",
playlist_id: "$playlist_id",
created_date:"$created_date"
}
}
}},
])
But please double check your own field's name.