Mongodb aggregation - sum a grouped, addToSet field - mongodb

I've got the following query:
db.listener.aggregate(
[
{ "$match" : { "location.countryName" : "Italy" } },
{ "$project" : { "location" : "$location"} },
{ "$group" : { "_id" : { "country": "$location.countryName", "city": "$location.cityName" }, "count" : { "$sum" : 1 }, "co-ords" : { "$addToSet" : { "lat" : "$location.latitude", "long" : "$location.longitude" } } } },
{ "$group" : { "_id" : "$_id.country", "cities" : { "$push" : { "city" : "$_id.city", "count" : "$count", "co-ords" : "$co-ords" } } } },
{ "$sort" : { "_id" : 1 } },
]
)
which give the following results (truncated):
{
"result" : [
{
"_id" : "Italy",
"cities" : [
{
"city" : "Seriate",
"count" : 1,
"co-ords" : [
{
"lat" : "45.6833",
"long" : "9.7167"
}
]
},
{
"city" : "Milan",
"count" : 3,
"co-ords" : [
{
"lat" : "45.4612",
"long" : "9.1878"
},
{
"lat" : "45.4667",
"long" : "9.2"
}
]
},
As you can see in the example for the city of Milan, the total city count is 3 but the number of longitude/latitude sets is two. This means that one of those more precise locations has two instances and the other has one.
I now need to amend my query to reflect the number of instances per latitude/longitude as well as the overall count.
It might look something like this:
{
"city" : "Milan",
"count" : 3,
"co-ords" : [
{
"lat" : "45.4612",
"long" : "9.1878",
"total" : 2
},
{
"lat" : "45.4667",
"long" : "9.2",
"total" : 1
}
]
},
I've tried a few things to achieve this but it never come out as I'd like or Mongo throws an error.
Anyone know the best way to do this?
Many thanks,
Nick.

db.listener.aggregate(
[
{ "$match" : { "location.countryName" : "Italy" } },
{ "$group" : { "_id" : { "country": "$location.countryName",
"city": "$location.cityName",
"coords": { "lat" : "$location.latitude", "long" : "$location.longitude" }
},
"count" : { "$sum" : 1 }
}
},
{ "$group" : { "_id" : { "country": "$_id.country", "city": "$_id.city" },
"coords": { "$addToSet" : { "long" : "$_id.coords.long",
"lat" : "$_id.coords.lat",
"total" : "$count"
}
},
"count" : { "$sum" : "$count" }
}
},
{ "$group" : { "_id" : "$_id.country",
"cities" : { "$push" : { "city" : "$_id.city",
"count" : "$count",
"coords" : "$coords" } } } },
{ "$sort" : { "_id" : 1 } },
]);
Sample output on your data from this:
{ "_id" : "Italy",
"cities" : [
{
"city" : "Seriate",
"count" : 1,
"coords" : [
{
"long" : "9.7167",
"lat" : "45.6833",
"total" : 1
}
]
},
{
"city" : "Milan",
"count" : 3,
"coords" : [
{
"long" : "9.1878",
"lat" : "45.4612",
"total" : 1
},
{
"long" : "9.2",
"lat" : "45.4667",
"total" : 2
}
]
}
]
}

Related

Problems aggregating MongoDB

I am having problems aggregating my Product Document in MongoDB.
My Product Document is:
{
"_id" : ObjectId("5d81171c2c69f45ef459e0af"),
"type" : "T-Shirt",
"name" : "Panda",
"description" : "Panda's are cool.",
"image" : ObjectId("5d81171c2c69f45ef459e0ad"),
"created_at" : ISODate("2019-09-17T18:25:48.026+01:00"),
"is_featured" : false,
"sizes" : [
"XS",
"S",
"M",
"L",
"XL"
],
"tags" : [ ],
"pricing" : {
"price" : 26,
"sale_price" : 8
},
"categories" : [
ObjectId("5d81171b2c69f45ef459e086"),
ObjectId("5d81171b2c69f45ef459e087")
],
"sku" : "5d81171c2c69f45ef459e0af"
},
And my Category Document is:
{
"_id" : ObjectId("5d81171b2c69f45ef459e087"),
"name" : "Art",
"description" : "These items are our artsy options.",
"created_at" : ISODate("2019-09-17T18:25:47.196+01:00")
},
My aim is to perform aggregation on the Product Document in order to count the number of items within each Category. So I have the Category "Art", I need to count the products are in the "Art" Category:
My current aggregate:
db.product.aggregate(
{ $unwind : "$categories" },
{
$group : {
"_id" : { "name" : "$name" },
"doc" : { $push : { "category" : "$categories" } },
}
},
{ $unwind : "$doc" },
{
$project : {
"_id" : 0,
"name" : "$name",
"category" : "$doc.category"
}
},
{
$group : {
"_id" : "$category",
"name": { "$first": "$name" },
"items_in_cat" : { $sum : 1 }
}
},
{ "$sort" : { "items_in_cat" : -1 } },
)
Which does actually work but not as I need:
{
"_id" : ObjectId("5d81171b2c69f45ef459e082"),
"name" : null, // Why is the name of the category no here?
"items_in_cat" : 4
},
As we can see the name is null. How can I aggregate the output to be:
{
"_id" : ObjectId("5d81171b2c69f45ef459e082"),
"name" : "Art",
"items_in_cat" : 4
},
We need to use $lookup to fetch the name from Category collection.
The following query can get us the expected output:
db.product.aggregate([
{
$unwind:"$categories"
},
{
$group:{
"_id":"$categories",
"items_in_cat":{
$sum:1
}
}
},
{
$lookup:{
"from":"category",
"let":{
"id":"$_id"
},
"pipeline":[
{
$match:{
$expr:{
$eq:["$_id","$$id"]
}
}
},
{
$project:{
"_id":0,
"name":1
}
}
],
"as":"categoryLookup"
}
},
{
$unwind:{
"path":"$categoryLookup",
"preserveNullAndEmptyArrays":true
}
},
{
$project:{
"_id":1,
"name":{
$ifNull:["$categoryLookup.name","NA"]
},
"items_in_cat":1
}
}
]).pretty()
Data set:
Collection: product
{
"_id" : ObjectId("5d81171c2c69f45ef459e0af"),
"type" : "T-Shirt",
"name" : "Panda",
"description" : "Panda's are cool.",
"image" : ObjectId("5d81171c2c69f45ef459e0ad"),
"created_at" : ISODate("2019-09-17T17:25:48.026Z"),
"is_featured" : false,
"sizes" : [
"XS",
"S",
"M",
"L",
"XL"
],
"tags" : [ ],
"pricing" : {
"price" : 26,
"sale_price" : 8
},
"categories" : [
ObjectId("5d81171b2c69f45ef459e086"),
ObjectId("5d81171b2c69f45ef459e087")
],
"sku" : "5d81171c2c69f45ef459e0af"
}
Collection: category
{
"_id" : ObjectId("5d81171b2c69f45ef459e086"),
"name" : "Art",
"description" : "These items are our artsy options.",
"created_at" : ISODate("2019-09-17T17:25:47.196Z")
}
{
"_id" : ObjectId("5d81171b2c69f45ef459e087"),
"name" : "Craft",
"description" : "These items are our artsy options.",
"created_at" : ISODate("2019-09-17T17:25:47.196Z")
}
Output:
{
"_id" : ObjectId("5d81171b2c69f45ef459e087"),
"items_in_cat" : 1,
"name" : "Craft"
}
{
"_id" : ObjectId("5d81171b2c69f45ef459e086"),
"items_in_cat" : 1,
"name" : "Art"
}

How to return all project employees?

I have datas of following format collection(projects) inside my database:
{ "_id" : ObjectId("5981a80f223e491a58230e5d"), "id" : 2, "name" : "gbqplhlqxzwl", "managerId" : 65151, "startDate" : "03.11.1999", "finishDate" : "02.01.2003", "projectStatus" : "POSTPONED", "participants" : [ ], "estimatedBudget" : 6017891.811079914 }
{ "_id" : ObjectId("5981a80f223e491a58230e5e"), "id" : 3, "name" : "erfekfsdgryu", "managerId" : 83749, "startDate" : "07.07.2007", "finishDate" : "26.12.2027", "projectStatus" : "POSTPONED", "participants" : [ 19229, 81856, 79270, 5509, 70344, 39424 ], "estimatedBudget" : 3086213.8981674756 }
{ "_id" : ObjectId("5981a80f223e491a58230e5f"), "id" : 1, "name" : "jvbzobhppntd", "managerId" : 18925, "startDate" : "29.04.1999", "finishDate" : "13.10.2008", "projectStatus" : "OPEN", "participants" : [ 46100, 96968, 6676, 56121, 4716, 68901, 43990, 48587, 62547, 30292, 65153, 17551, 27083, 20261, 27097, 50036, 86585, 69890, 18790, 22592, 60774, 93709, 78471, 27157, 4328, 36501, 47296, 16831 ], "estimatedBudget" : 3581496.7068344904 }
{ "_id" : ObjectId("5981a80f223e491a58230e60"), "id" : 4, "name" : "cdspkkqwvwld", "managerId" : 62042, "startDate" : "13.03.1998", "finishDate" : "20.06.2007", "projectStatus" : "OPEN", "participants" : [ 53480, 60897, 23677, 22064, 60807, 66637, 84609, 28378, 87143, 27675, 79283, 94992, 20429, 48769, 91671, 41747, 21651, 91134, 41684, 57228, 51949, 18756, 45679, 87781, 67287, 6902, 27526 ], "estimatedBudget" : 2126283.953787842 }
....
I need to find the busiest employee and list all his projects.
participants array contains employee ids who participate in the project.
I use the following query to find the busiest employee:
db.projects.aggregate(
{
$unwind: '$participants'
},
{
$addFields: {
count: 1
}
},
{
$group: {
_id : '$participants',
participation_count : {
'$sum':'$count'
}
}
},
{
$sort:{participation_count:-1}
},
{
$limit:1
}
)
and this work correctly. But I have no ideas how to list all his projects.
any ideas?
db.projects.aggregate(
[
{
$unwind: '$participants'
},
{
$addFields: {
count: 1
}
},
{
$group: {
_id : '$participants',
participation_count : {'$sum':'$count'},
projectId : {$push: '$id'}
}
},
{
$sort:{participation_count:-1}
},
{
$limit:1
}
],
{
allowDiskUse:true
}
)

mongodb aggregation $group and then $push a object

this is my data :
> db.bookmarks.find({"userId" : "56b9b74bf976ab70ff6b9999"}).pretty()
{
"_id" : ObjectId("56c2210fee4a33579f4202dd"),
"userId" : "56b9b74bf976ab70ff6b9999",
"items" : [
{
"itemId" : "28",
"timestamp" : "2016-02-12T18:07:28Z"
},
{
"itemId" : "29",
"timestamp" : "2016-02-12T18:07:29Z"
},
{
"itemId" : "30",
"timestamp" : "2016-02-12T18:07:30Z"
},
{
"itemId" : "31",
"timestamp" : "2016-02-12T18:07:31Z"
},
{
"itemId" : "32",
"timestamp" : "2016-02-12T18:07:32Z"
},
{
"itemId" : "33",
"timestamp" : "2016-02-12T18:07:33Z"
},
{
"itemId" : "34",
"timestamp" : "2016-02-12T18:07:34Z"
}
]
}
I want to have something like (actually i hope the _id can become userId too) :
{
"_id" : "56b9b74bf976ab70ff6b9999",
"items" : [
{ "itemId": "32", "timestamp": "2016-02-12T18:07:32Z" },
{ "itemId": "31", "timestamp": "2016-02-12T18:07:31Z" },
{ "itemId": "30", "timestamp": "2016-02-12T18:07:30Z" }
]
}
What I have now :
> db.bookmarks.aggregate(
... { $match: { "userId" : "56b9b74bf976ab70ff6b9999" } },
... { $unwind: '$items' },
... { $sort: { 'items.timestamp': -1} },
... { $skip: 2 },
... { $limit: 3},
... { $group: { '_id': '$userId' , items: { $push: '$items.itemId' } } }
... ).pretty()
{ "_id" : "56b9b74bf976ab70ff6b9999", "items" : [ "32", "31", "30" ] }
i tried to read the document in mongo and find out i can $push, but somehow i cannot find a way to push such object, which is not defined anywhere in the whole object. I want to have the timestamp also.. but i don't know how should i modified the $group (or others??) to do so. thanks for helping!
This code, which I tested in the MongoDB 3.2.1 shell, should give you the output format that you want:
> db.bookmarks.aggregate(
{ "$match" : { "userId" : "Ursula" } },
{ "$unwind" : "$items" },
{ "$sort" : { "items.timestamp" : -1 } },
{ "$skip" : 2 },
{ "$limit" : 3 },
{ "$group" : { "_id" : "$userId", items: { "$push" : { "myPlace" : "$items.itemId", "myStamp" : "$items.timestamp" } } } } ).pretty()
Running the above will produce this output:
{
"_id" : "Ursula",
"items" : [
{
"myPlace" : "52",
"myStamp" : ISODate("2016-02-13T18:07:32Z")
},
{
"myPlace" : "51",
"myStamp" : ISODate("2016-02-13T18:07:31Z")
},
{
"myPlace" : "50",
"myStamp" : ISODate("2016-02-13T18:07:30Z")
}
]
}
In MongoDB version 3.2.x, you can also use the $out operator in the very last stage of the aggregation pipeline, and have the output of the aggregation query written to a collection. Here is the code I used:
> db.bookmarks.aggregate(
{ "$match" : { "userId" : "Ursula" } },
{ "$unwind" : "$items" },
{ "$sort" : { "items.timestamp" : -1 } },
{ "$skip" : 2 },
{ "$limit" : 3 },
{ "$group" : { "_id" : "$userId", items: { "$push" : { "myPlace" : "$items.itemId", "myStamp" : "$items.timestamp" } } } },
{ "$out" : "ursula" } )
This gives me a collection named "ursula":
> show collections
ursula
and I can query that collection:
> db.ursula.find().pretty()
{
"_id" : "Ursula",
"items" : [
{
"myPlace" : "52",
"myStamp" : ISODate("2016-02-13T18:07:32Z")
},
{
"myPlace" : "51",
"myStamp" : ISODate("2016-02-13T18:07:31Z")
},
{
"myPlace" : "50",
"myStamp" : ISODate("2016-02-13T18:07:30Z")
}
]
}
>
Last of all, this is the input document I used in the aggregation query. You can compare this document to how I coded the aggregation query to see how I built the new items array.
> db.bookmarks.find( { "userId" : "Ursula" } ).pretty()
{
"_id" : ObjectId("56c240ed55f2f6004dc3b25c"),
"userId" : "Ursula",
"items" : [
{
"itemId" : "48",
"timestamp" : ISODate("2016-02-13T18:07:28Z")
},
{
"itemId" : "49",
"timestamp" : ISODate("2016-02-13T18:07:29Z")
},
{
"itemId" : "50",
"timestamp" : ISODate("2016-02-13T18:07:30Z")
},
{
"itemId" : "51",
"timestamp" : ISODate("2016-02-13T18:07:31Z")
},
{
"itemId" : "52",
"timestamp" : ISODate("2016-02-13T18:07:32Z")
},
{
"itemId" : "53",
"timestamp" : ISODate("2016-02-13T18:07:33Z")
},
{
"itemId" : "54",
"timestamp" : ISODate("2016-02-13T18:07:34Z")
}
]
}

MongoDB Aggregation - return default value for documents that don't match query

I'm having trouble figuring out the right aggregation pipe operations to return the results I need.
I have a collection similar to the following :-
{
"_id" : "writer1",
"Name" : "writer1",
"Website" : "website1",
"Reviews" : [
{
"Film" : {
"Name" : "Jurassic Park",
"Genre" : "Action"
},
"Score" : 4
},
{
"Technology" : {
"Name" : "Mad Max",
"Genre" : "Action"
},
"Score" : 5
}
]
}
{
"_id" : "writer2",
"Name" : "writer2",
"Website" : "website1",
"Reviews" : [
{
"Technology" : {
"Name" : "Mad Max",
"Genre" : "Action"
},
"Score" : 5
}
]
}
And this is my aggregation so far : -
db.writers.aggregate([
{ "$unwind" : "$Reviews" },
{ "$match" : { "Reviews.Film.Name" : "Jurassic Park" } },
{ "$group" : { "_id" : "$Website" , "score" : { "$avg" : "$Reviews.Score" },
writers :{ $push: { name:"$Name", score:"$Reviews.Score" } }
}}
])
This returns only writers who have a review of the matching film and also only websites that have at least 1 writer who has reviewed the film,
however, I need to return all websites containing a list of their all writers, with a score of 0 if they haven't written a review for the specified film.
so, I am currently getting : -
{ "_id" : "website1", "score" : 4, "writers" : [ { "name" : "writer1", "score" : 4 } ] }
When I actually need : -
{ "_id" : "website1", "score" : 2, "writers" : [ { "name" : "writer1", "score" : 4 },{ "name" :"writer2", "score" : 0 } ] }
Can anyone point me in the right direction?
Cheers

How to find count of a key in one document using mongoDB?

I have following structure in my collection:
users:[
{
"name":"ABC",
"address":{
"city":"London",
"country":"UK",
}
},
{
"name":"XYZ",
"address":{
"city":"London",
"country":"UK",
}
},
{
"name":"PQR",
"address":{
"city":"NewYork",
"country":"US",
}
}
]
I want count of number of occurrences of 'city' key in 'address' and 'name' as a result.
I want to query above collection and want following output:
[{
"name":"ABC",
"city":"London",
"count":2
},{
"name":"XYZ",
"city":"London",
"count":2
}, {
"name":"PQR",
"city":"NewYork",
"count":1
}
]
I simulated your collection
{
"_id" : ObjectId("547c30ae371ea419f07b9550"),
"users" : [
{
"name" : "ABC",
"address" : {
"city" : "London",
"country" : "UK"
}
},
{
"name" : "XYZ",
"address" : {
"city" : "London",
"country" : "UK"
}
},
{
"name" : "PQR",
"address" : {
"city" : "NewYork",
"country" : "US"
}
}
]
}
And then I use aggregate framework
db.coll.aggregate([
{
$unwind:"$users"
},
{
$group:{
_id:"$users.address.city",
name:{$push:"$users.name"},
city:{$first:"$users.address.city"},
count:{$sum:1}
}
},{
$unwind:"$name"
},{
$project:{
_id:0,
"city":"$_id",
"name":1,
"city":1,
"count":1
}
}])
result:
{
"result" : [
{
"name" : "PQR",
"city" : "NewYork",
"count" : 1
},
{
"name" : "ABC",
"city" : "London",
"count" : 2
},
{
"name" : "XYZ",
"city" : "London",
"count" : 2
}
],
"ok" : 1
}
UPDATE AFTER QUESTION
I added a new Document
{
"_id" : ObjectId("547c394c371ea419f07b9551"),
"users" : [
{
"address" : {
"city" : "Livorno",
"country" : "LI"
}
},
{
"address" : {
"city" : "Livorno",
"country" : "LI"
}
},
{
"address" : {
"city" : "NewYork",
"country" : "US"
}
}
]
}
and new Query
db.coll.aggregate([
{
$unwind:"$users"
},
{
$group:{
_id:"$users.address.city",
"name": {
$push:{"$ifNull": ["$users.name","$_id"]}
},
city:{$first:"$users.address.city"},
count:{$sum:1}
}
},{
$unwind:"$name"
},{
$project:{
_id:0,
"city":"$_id",
"name":1,
"city":1,
"count":1
}
}])
Result:
{
"result" : [
{
"name" : "PQR",
"city" : "NewYork",
"count" : 2
},
{
"name" : ObjectId("547c394c371ea419f07b9551"),
"city" : "NewYork",
"count" : 2
},
{
"name" : ObjectId("547c394c371ea419f07b9551"),
"city" : "Livorno",
"count" : 2
},
{
"name" : ObjectId("547c394c371ea419f07b9551"),
"city" : "Livorno",
"count" : 2
},
{
"name" : "ABC",
"city" : "London",
"count" : 2
},
{
"name" : "XYZ",
"city" : "London",
"count" : 2
}
],
"ok" : 1
}