Map aggregation results in Mongo - mongodb

Here is the data set:
{ "_id" : "1", "key" : "111", "payload" : 100, "type" : "foo", "createdAt" : ISODate("2016-07-08T11:59:18.000Z") }
{ "_id" : "2", "key" : "111", "payload" : 100, "type" : "bar", "createdAt" : ISODate("2016-07-09T11:59:19.000Z") }
{ "_id" : "3", "key" : "222", "payload" : 100, "type" : "foo", "createdAt" : ISODate("2016-07-10T11:59:20.000Z") }
{ "_id" : "4", "key" : "222", "payload" : 100, "type" : "foo", "createdAt" : ISODate("2016-07-11T11:59:21.000Z") }
{ "_id" : "5", "key" : "222", "payload" : 100, "type" : "bar", "createdAt" : ISODate("2016-07-12T11:59:22.000Z") }
I have to group them by key:
db.items.aggregate([{$group: {_id: {key: '$key'}}}])
that produces the next set:
{ "_id" : { "key" : "111" } }
{ "_id" : { "key" : "222" } }
And after that I have to retrieve the most recent values of foo and bar per each group record.
My question is what is the most optimal way to do it? I can iterate the items in javascript and perform additional roundtrip to DB per each group result. But I'm not sure if it's time-efficient.

I am not sure about the most optimal way to do it, but the easy one will be to expand your aggregation pipeline like
db.items.aggregate([
{
$group:
{
_id: { key: "$key", type: "$type" },
last: { $max: "$createdAt" }
}
},
{
$group:
{
_id: { key: "$_id.key" },
mostRecent: { $push: { type: "$_id.type", createdAt: "$last" } }
}
}
]);
that for your collection of documents will result into
{ "_id" : { "key" : "222" }, "mostRecent" : [ { "type" : "bar", "createdAt" : ISODate("2016-07-12T11:59:22Z") }, { "type" : "foo", "createdAt" : ISODate("2016-07-11T11:59:21Z") } ] }
{ "_id" : { "key" : "111" }, "mostRecent" : [ { "type" : "bar", "createdAt" : ISODate("2016-07-09T11:59:19Z") }, { "type" : "foo", "createdAt" : ISODate("2016-07-08T11:59:18Z") } ] }

Related

What is $$ROOT in MongoDB aggregate and how it works?

I am watching a tutorial I can understand how this aggregate works, What is the use of pings, $$ROOT in it.
client = pymongo.MongoClient(MY_URL)
pings = client['mflix']['watching_pings']
cursor = pings.aggregate([
{
"$sample": { "size": 50000 }
},
{
"$addFields": {
"dayOfWeek": { "$dayOfWeek": "$ts" },
"hourOfDay": { "$hour": "$ts" }
}
},
{
"$group": { "_id": "$dayOfWeek", "pings": { "$push": "$$ROOT" } }
},
{
"$sort": { "_id": 1 }
}
]);
Let's assume that our collection looks like below:
{
"_id" : ObjectId("b9"),
"key" : 1,
"value" : 20,
"history" : ISODate("2020-05-16T00:00:00Z")
},
{
"_id" : ObjectId("ba"),
"key" : 1,
"value" : 10,
"history" : ISODate("2020-05-13T00:00:00Z")
},
{
"_id" : ObjectId("bb"),
"key" : 3,
"value" : 50,
"history" : ISODate("2020-05-12T00:00:00Z")
},
{
"_id" : ObjectId("bc"),
"key" : 2,
"value" : 0,
"history" : ISODate("2020-05-13T00:00:00Z")
},
{
"_id" : ObjectId("bd"),
"key" : 2,
"value" : 10,
"history" : ISODate("2020-05-16T00:00:00Z")
}
Now based on the history field you want to group and insert the whole documents in to an array field 'items'. Here $$ROOT variable will be helpful.
So, the aggregation query to achieve the above will be:
db.collection.aggregate([{
$group: {
_id: '$history',
items: {$push: '$$ROOT'}
}
}])
It will result in following output:
{
"_id" : ISODate("2020-05-12T00:00:00Z"),
"items" : [
{
"_id" : ObjectId("bb"),
"key" : 3,
"value" : 50,
"history" : ISODate("2020-05-12T00:00:00Z")
}
]
},
{
"_id" : ISODate("2020-05-13T00:00:00Z"),
"items" : [
{
"_id" : ObjectId("ba"),
"key" : 1,
"value" : 10,
"history" : ISODate("2020-05-13T00:00:00Z")
},
{
"_id" : ObjectId("bc"),
"key" : 2,
"value" : 0,
"history" : ISODate("2020-05-13T00:00:00Z")
}
]
},
{
"_id" : ISODate("2020-05-16T00:00:00Z"),
"items" : [
{
"_id" : ObjectId("b9"),
"key" : 1,
"value" : 20,
"history" : ISODate("2020-05-16T00:00:00Z")
},
{
"_id" : ObjectId("bd"),
"key" : 2,
"value" : 10,
"history" : ISODate("2020-05-16T00:00:00Z")
}
]
}
I hope it helps.

Problems aggregating MongoDB

I am having problems aggregating my Product Document in MongoDB.
My Product Document is:
{
"_id" : ObjectId("5d81171c2c69f45ef459e0af"),
"type" : "T-Shirt",
"name" : "Panda",
"description" : "Panda's are cool.",
"image" : ObjectId("5d81171c2c69f45ef459e0ad"),
"created_at" : ISODate("2019-09-17T18:25:48.026+01:00"),
"is_featured" : false,
"sizes" : [
"XS",
"S",
"M",
"L",
"XL"
],
"tags" : [ ],
"pricing" : {
"price" : 26,
"sale_price" : 8
},
"categories" : [
ObjectId("5d81171b2c69f45ef459e086"),
ObjectId("5d81171b2c69f45ef459e087")
],
"sku" : "5d81171c2c69f45ef459e0af"
},
And my Category Document is:
{
"_id" : ObjectId("5d81171b2c69f45ef459e087"),
"name" : "Art",
"description" : "These items are our artsy options.",
"created_at" : ISODate("2019-09-17T18:25:47.196+01:00")
},
My aim is to perform aggregation on the Product Document in order to count the number of items within each Category. So I have the Category "Art", I need to count the products are in the "Art" Category:
My current aggregate:
db.product.aggregate(
{ $unwind : "$categories" },
{
$group : {
"_id" : { "name" : "$name" },
"doc" : { $push : { "category" : "$categories" } },
}
},
{ $unwind : "$doc" },
{
$project : {
"_id" : 0,
"name" : "$name",
"category" : "$doc.category"
}
},
{
$group : {
"_id" : "$category",
"name": { "$first": "$name" },
"items_in_cat" : { $sum : 1 }
}
},
{ "$sort" : { "items_in_cat" : -1 } },
)
Which does actually work but not as I need:
{
"_id" : ObjectId("5d81171b2c69f45ef459e082"),
"name" : null, // Why is the name of the category no here?
"items_in_cat" : 4
},
As we can see the name is null. How can I aggregate the output to be:
{
"_id" : ObjectId("5d81171b2c69f45ef459e082"),
"name" : "Art",
"items_in_cat" : 4
},
We need to use $lookup to fetch the name from Category collection.
The following query can get us the expected output:
db.product.aggregate([
{
$unwind:"$categories"
},
{
$group:{
"_id":"$categories",
"items_in_cat":{
$sum:1
}
}
},
{
$lookup:{
"from":"category",
"let":{
"id":"$_id"
},
"pipeline":[
{
$match:{
$expr:{
$eq:["$_id","$$id"]
}
}
},
{
$project:{
"_id":0,
"name":1
}
}
],
"as":"categoryLookup"
}
},
{
$unwind:{
"path":"$categoryLookup",
"preserveNullAndEmptyArrays":true
}
},
{
$project:{
"_id":1,
"name":{
$ifNull:["$categoryLookup.name","NA"]
},
"items_in_cat":1
}
}
]).pretty()
Data set:
Collection: product
{
"_id" : ObjectId("5d81171c2c69f45ef459e0af"),
"type" : "T-Shirt",
"name" : "Panda",
"description" : "Panda's are cool.",
"image" : ObjectId("5d81171c2c69f45ef459e0ad"),
"created_at" : ISODate("2019-09-17T17:25:48.026Z"),
"is_featured" : false,
"sizes" : [
"XS",
"S",
"M",
"L",
"XL"
],
"tags" : [ ],
"pricing" : {
"price" : 26,
"sale_price" : 8
},
"categories" : [
ObjectId("5d81171b2c69f45ef459e086"),
ObjectId("5d81171b2c69f45ef459e087")
],
"sku" : "5d81171c2c69f45ef459e0af"
}
Collection: category
{
"_id" : ObjectId("5d81171b2c69f45ef459e086"),
"name" : "Art",
"description" : "These items are our artsy options.",
"created_at" : ISODate("2019-09-17T17:25:47.196Z")
}
{
"_id" : ObjectId("5d81171b2c69f45ef459e087"),
"name" : "Craft",
"description" : "These items are our artsy options.",
"created_at" : ISODate("2019-09-17T17:25:47.196Z")
}
Output:
{
"_id" : ObjectId("5d81171b2c69f45ef459e087"),
"items_in_cat" : 1,
"name" : "Craft"
}
{
"_id" : ObjectId("5d81171b2c69f45ef459e086"),
"items_in_cat" : 1,
"name" : "Art"
}

mongodb find the document by id and then group the result based on name field

I have a collection with multiple documents like
{
"_id" : ObjectId("5a64d076bfd103df081967ae"),
"status" : "",
"Number" : 53,
"values" : [
{
"date" : "2015-05-18",
"value" : 12.41
},
{
"date" : "2015-05-19",
"value" : 12.45
},
],
"Name" : "ABC Banking",
"scheme":"ABC1",
"createdDate" : "21-01-2018"
}
{
"_id" : ObjectId("5a64d076bfd103df081967ae"),
"status" : "",
"Number" : 53,
"values" : [
{
"date" : "2015-05-18",
"value" : 13.41
},
{
"date" : "2015-05-19",
"value" : 13.45
},
],
"Name" : "ABC Banking",
"scheme":"ABC2",
"createdDate" : "21-01-2018"
}
I am Querying collection based on Number field like
db.getCollection('mfhistories').find({'Number':53})
to get all the documents with this Number.
Now I want to group all the collection with Name 'ABC Banking' into an array. so that I will get result based on Name.
so the result should be like
{
"Name":"ABC Banking",
[
{
"_id" : ObjectId("5a64d076bfd103df081967ae"),
"status" : "",
"Number" : 53,
"values" : [
{
"date" : "2015-05-18",
"value" : 13.41
},
{
"date" : "2015-05-19",
"value" : 13.45
},
],
"scheme":"ABC1",
"createdDate" : "21-01-2018"
},
{
"_id" : ObjectId("5a64d076bfd103df081967ae"),
"status" : "",
"Number" : 53,
"values" : [
{
"date" : "2015-05-18",
"value" : 13.41
},
{
"date" : "2015-05-19",
"value" : 13.45
}
],
"scheme":"ABC2",
"createdDate" : "21-01-2018"
}
]
}
Please help..
Thanks,
J
You can use Aggregation Framework for that:
db.col.aggregate([
{
$match: { Number: 53, Name: "ABC Banking" }
},
{
$group: {
_id: "$Name",
docs: { $push: "$$ROOT" }
}
},
{
$project: {
Name: "$_id",
_id: 0,
docs: 1
}
}
])
$$ROOT is a special variable which captures entire document. More here.
db.mfhistories.aggregate(
// Pipeline
[
// Stage 1
{
$match: {
Number: 53
}
},
// Stage 2
{
$group: {
_id: {
Name: '$Name'
},
docObj: {
$addToSet: '$$CURRENT'
}
}
},
// Stage 3
{
$project: {
Name: '$_id.Name',
docObj: 1,
_id: 0
}
}
]
);

Mongodb : get whether a document is the latest with a field value and filter on the result

I am trying to port an existing SQL schema into Mongo.
We have document tables, with sometimes several times the same document, with a different revision but the same reference. I want to get only the latest revisions of the documents.
A sample input data:
{
"Uid" : "xxx",
"status" : "ACCEPTED",
"reference" : "DOC305",
"code" : "305-D",
"title" : "Document 305",
"creationdate" : ISODate("2011-11-24T15:13:28.887Z"),
"creator" : "X"
},
{
"Uid" : "xxx",
"status" : "COMMENTED",
"reference" : "DOC306",
"code" : "306-A",
"title" : "Document 306",
"creationdate" : ISODate("2011-11-28T07:23:18.807Z"),
"creator" : "X"
},
{
"Uid" : "xxx",
"status" : "COMMENTED",
"reference" : "DOC306",
"code" : "306-B",
"title" : "Document 306",
"creationdate" : ISODate("2011-11-28T07:26:49.447Z"),
"creator" : "X"
},
{
"Uid" : "xxx",
"status" : "ACCEPTED",
"reference" : "DOC501",
"code" : "501-A",
"title" : "Document 501",
"creationdate" : ISODate("2011-11-19T06:30:35.757Z"),
"creator" : "X"
},
{
"Uid" : "xxx",
"status" : "ACCEPTED",
"reference" : "DOC501",
"code" : "501-B",
"title" : "Document 501",
"creationdate" : ISODate("2011-11-19T06:40:32.957Z"),
"creator" : "X"
}
Given this data, I want this result set (sometimes I want only the last revision, sometimes I want all revisions with an attribute telling me whether it's the latest):
{
"Uid" : "xxx",
"status" : "ACCEPTED",
"reference" : "DOC305",
"code" : "305-D",
"title" : "Document 305",
"creationdate" : ISODate("2011-11-24T15:13:28.887Z"),
"creator" : "X",
"lastrev" : true
},
{
"Uid" : "xxx",
"status" : "COMMENTED",
"reference" : "DOC306",
"code" : "306-B",
"title" : "Document 306",
"creationdate" : ISODate("2011-11-28T07:26:49.447Z"),
"creator" : "X",
"lastrev" : true
},
{
"Uid" : "xxx",
"status" : "ACCEPTED",
"reference" : "DOC501",
"code" : "501-B",
"title" : "Document 501",
"creationdate" : ISODate("2011-11-19T06:40:32.957Z"),
"creator" : "X",
"lastrev" : true
}
I already have a bunch of filters, sorting, and skip/limit (for pagination of data), so the final result set should be mindful of these constraints.
The current "find" query (built with the .Net driver), which filters fine but gives me all revisions of each document:
coll.find(
{ "$and" : [
{ "$or" : [
{ "deletedid" : { "$exists" : false } },
{ "deletedid" : null }
] },
{ "$or" : [
{ "taskid" : { "$exists" : false } },
{ "taskid" : null }
] },
{ "objecttypeuid" : { "$in" : ["xxxxx"] } }
] },
{ "_id" : 0, "Uid" : 1, "lastrev" : 1, "title" : 1, "code" : 1, "creator" : 1, "owner" : 1, "modificator" : 1, "status" : 1, "reference": 1, "creationdate": 1 }
).sort({ "creationdate" : 1 }).skip(0).limit(10);
Using another question, I have been able to build this aggregation, which gives me the latest revision of each document, but with not enough attributes in the result:
coll.aggregate([
{ $sort: { "creationdate": 1 } },
{
$group: {
"_id": "$reference",
result: { $last: "$creationdate" },
creationdate: { $last: "$creationdate" }
}
}
]);
I would like to integrating the aggregate with the find query.
I have found the way to mix aggregation and filtering:
coll.aggregate(
[
{ $match: {
"$and" : [
{ "$or" : [
{ "deletedid" : { "$exists" : false } },
{ "deletedid" : null }
] },
{ "$or" : [
{ "taskid" : { "$exists" : false } },
{ "taskid" : null }
] },
{ "objecttypeuid" : { "$in" : ["xxx"] } }
]
}
},
{ $sort: { "creationdate": 1 } },
{ $group: {
"_id": "$reference",
"doc": { "$last": "$$ROOT" }
}
},
{ $sort: { "doc.creationdate": 1 } },
{ $skip: skip },
{ $limit: limit }
],
{ allowDiskUse: true }
);
For each result node, this gives me a "doc" node with the document data. It has too much data still (it's missing projections), but it's a start.
Translated in .Net:
FilterDefinitionBuilder<BsonDocument> filterBuilder = Builders<BsonDocument>.Filter;
FilterDefinition<BsonDocument> filters = filterBuilder.Empty;
filters = filters & (filterBuilder.Not(filterBuilder.Exists("deletedid")) | filterBuilder.Eq("deletedid", BsonNull.Value));
filters = filters & (filterBuilder.Not(filterBuilder.Exists("taskid")) | filterBuilder.Eq("taskid", BsonNull.Value));
foreach (var f in fieldFilters) {
filters = filters & filterBuilder.In(f.Key, f.Value);
}
var sort = Builders<BsonDocument>.Sort.Ascending(orderby);
var group = new BsonDocument {
{ "_id", "$reference" },
{ "doc", new BsonDocument("$last", "$$ROOT") }
};
var aggregate = coll.Aggregate(new AggregateOptions { AllowDiskUse = true })
.Match(filters)
.Sort(sort)
.Group(group)
.Sort(sort)
.Skip(skip)
.Limit(rows);
return aggregate.ToList();
I'm pretty sure there are better ways to do this, though.
You answer is pretty close. Instead of $last, $max is better.
About $last operator:
Returns the value that results from applying an expression to the last document in a group of documents that share the same group by a field. Only meaningful when documents are in a defined order.
Get the last revision in each group, see code below in mongo shell:
db.collection.aggregate([
{
$group: {
_id: '$reference',
doc: {
$max: {
"creationdate" : "$creationdate",
"code" : "$code",
"Uid" : "$Uid",
"status" : "$status",
"title" : "$title",
"creator" : "$creator"
}
}
}
},
{
$project: {
_id: 0,
Uid: "$doc.Uid",
status: "$doc.status",
reference: "$_id",
code: "$doc.code",
title: "$doc.title",
creationdate: "$doc.creationdate",
creator: "$doc.creator"
}
}
]).pretty()
The output as your expect:
{
"Uid" : "xxx",
"status" : "ACCEPTED",
"reference" : "DOC501",
"code" : "501-B",
"title" : "Document 501",
"creationdate" : ISODate("2011-11-19T06:40:32.957Z"),
"creator" : "X"
}
{
"Uid" : "xxx",
"status" : "COMMENTED",
"reference" : "DOC306",
"code" : "306-B",
"title" : "Document 306",
"creationdate" : ISODate("2011-11-28T07:26:49.447Z"),
"creator" : "X"
}
{
"Uid" : "xxx",
"status" : "ACCEPTED",
"reference" : "DOC305",
"code" : "305-D",
"title" : "Document 305",
"creationdate" : ISODate("2011-11-24T15:13:28.887Z"),
"creator" : "X"
}

Group by specific element of array with mongo aggregation framework

Is it possible to use the aggregation framework to group by a specific element of an array?
Such that with documents like this:
{
name: 'Russell',
favourite_foods: [
{ name: 'Pizza', type: 'Four Cheeses' },
{ name: 'Burger', type: 'Veggie'}
],
height: 6
}
I could get a distinct list of top favourite foods (ie. foods at index 0) along with the height of the tallest person who's top favourite food that is?
Something like this (although it doesn't work as the array index access dot notation doesn't seem to work in the aggregation framework):
db.people.aggregate([
{ $group : { _id: "$favourite_foods.0.name", max_height: { $max : "$height" } } }
])
Seems like you are relying on the favorite food for each person being first in the array. If so, there is an aggregation framework operator you can take advantage of.
Here is the pipeline you can use:
db.people.aggregate(
[
{
"$unwind" : "$favourite_foods"
},
{
"$group" : {
"_id" : {
"name" : "$name",
"height" : "$height"
},
"faveFood" : {
"$first" : "$favourite_foods"
}
}
},
{
"$group" : {
"_id" : "$faveFood.name",
"height" : {
"$max" : "$_id.height"
}
}
}
])
On this sample dataset:
> db.people.find().pretty()
{
"_id" : ObjectId("508894efd4197aa2b9490741"),
"name" : "Russell",
"favourite_foods" : [
{
"name" : "Pizza",
"type" : "Four Cheeses"
},
{
"name" : "Burger",
"type" : "Veggie"
}
],
"height" : 6
}
{
"_id" : ObjectId("5088950bd4197aa2b9490742"),
"name" : "Lucy",
"favourite_foods" : [
{
"name" : "Pasta",
"type" : "Four Cheeses"
},
{
"name" : "Burger",
"type" : "Veggie"
}
],
"height" : 5.5
}
{
"_id" : ObjectId("5088951dd4197aa2b9490743"),
"name" : "Landy",
"favourite_foods" : [
{
"name" : "Pizza",
"type" : "Four Cheeses"
},
{
"name" : "Pizza",
"type" : "Veggie"
}
],
"height" : 5
}
{
"_id" : ObjectId("50889541d4197aa2b9490744"),
"name" : "Augie",
"favourite_foods" : [
{
"name" : "Sushi",
"type" : "Four Cheeses"
},
{
"name" : "Pizza",
"type" : "Veggie"
}
],
"height" : 6.2
}
You get these results:
{
"result" : [
{
"_id" : "Pasta",
"height" : 5.5
},
{
"_id" : "Pizza",
"height" : 6
},
{
"_id" : "Sushi",
"height" : 6.2
}
],
"ok" : 1
}
Looks like it isn't currently possible to extract a specific element from an array in aggregation:
https://jira.mongodb.org/browse/SERVER-4589
JUST add more information about the result after using "$wind":
DOCUMENT :
> db.people.find().pretty()
{
"_id" : ObjectId("508894efd4197aa2b9490741"),
"name" : "Russell",
"favourite_foods" : [
{
"name" : "Pizza",
"type" : "Four Cheeses"
},
{
"name" : "Burger",
"type" : "Veggie"
}
],
"height" : 6
},
...
AGGREAGATION :
db.people.aggregate([{
$unwind: "$favourite_foods"
}]);
RESULT :
{
"_id" : ObjectId("508894efd4197aa2b9490741"),
"name" : "Russell",
"favourite_foods" :{
"name" : "Pizza",
"type" : "Four Cheeses"
},
"height" : 6
},
{
"_id" : ObjectId("508894efd4197aa2b9490741"),
"name" : "Russell",
"favourite_foods" : {
"name" : "Burger",
"type" : "Veggie"
},
"height" : 6
}
In Addition:
If there are more than two array fields in one collection record,
we can use "$project" stage to specify the array field.
db.people.aggregate([
{
$project:{
"favourite_foods": 1
}
},
{
$unwind: "$favourite_foods"
}
]);
I think you can make use of the $project and $unwind operators (let me know if this isn't what you're trying to accomplish):
> db.people.aggregate(
{$unwind: "$favourite_foods"},
{$project: {food : "$favourite_foods", height: 1}},
{$group : { _id: "$food", max_height: { $max : "$height" } } })
{
"result" : [
{
"_id" : {
"name" : "Burger",
"type" : "Veggie"
},
"max_height" : 6
},
{
"_id" : {
"name" : "Pizza",
"type" : "Four Cheeses"
},
"max_height" : 6
}
],
"ok" : 1
}
http://docs.mongodb.org/manual/applications/aggregation/
Since mongoDB version 3.2 You can simply use $arrayElemAt and $max:
db.collection.aggregate([
{
$set: {favourite_foods: {$arrayElemAt: ["$favourite_foods", 0]}}
},
{
$group: {
_id: "$favourite_foods.name",
maxHeight: {$max: "$height"}
}
}
])
Playground example