Here is my some sample data in collection sale
[
{group:2, item:a, qty:3 },
{group:2, item:b, qty:3 },
{group:2, item:b, qty:2 },
{group:1, item:a, qty:3 },
{group:1, item:a, qty:5 },
{group:1, item:b, qty:5 }
]
and I want to query data like below and sort the popular group to the top
[
{ group:1, items:[{name:'a',total_qty:8},{name:'b',total_qty:5} ],total_qty:13 },
{ group:2, items:[{name:'a',total_qty:3},{name:'b',total_qty:5} ],total_qty:8 },
]
Actually we can loop in server script( php, nodejs ...) but the problem is pagination. I cannot use skip to get the right result.
The following query can get us the expected output:
db.collection.aggregate([
{
$group:{
"_id":{
"group":"$group",
"item":"$item"
},
"group":{
$first:"$group"
},
"item":{
$first:"$item"
},
"total_qty":{
$sum:"$qty"
}
}
},
{
$group:{
"_id":"$group",
"group":{
$first:"$group"
},
"items":{
$push:{
"name":"$item",
"total_qty":"$total_qty"
}
},
"total_qty":{
$sum:"$total_qty"
}
}
},
{
$project:{
"_id":0
}
}
]).pretty()
Data set:
{
"_id" : ObjectId("5d84a37febcbd560107c54a7"),
"group" : 2,
"item" : "a",
"qty" : 3
}
{
"_id" : ObjectId("5d84a37febcbd560107c54a8"),
"group" : 2,
"item" : "b",
"qty" : 3
}
{
"_id" : ObjectId("5d84a37febcbd560107c54a9"),
"group" : 2,
"item" : "b",
"qty" : 2
}
{
"_id" : ObjectId("5d84a37febcbd560107c54aa"),
"group" : 1,
"item" : "a",
"qty" : 3
}
{
"_id" : ObjectId("5d84a37febcbd560107c54ab"),
"group" : 1,
"item" : "a",
"qty" : 5
}
{
"_id" : ObjectId("5d84a37febcbd560107c54ac"),
"group" : 1,
"item" : "b",
"qty" : 5
}
Output:
{
"group" : 2,
"items" : [
{
"name" : "b",
"total_qty" : 5
},
{
"name" : "a",
"total_qty" : 3
}
],
"total_qty" : 8
}
{
"group" : 1,
"items" : [
{
"name" : "b",
"total_qty" : 5
},
{
"name" : "a",
"total_qty" : 8
}
],
"total_qty" : 13
}
You need to use $group aggregation with $sum and $push accumulator
db.collection.aggregate([
{ "$group": {
"_id": "$group",
"items": { "$push": "$$ROOT" },
"total_qty": { "$sum": "$qty" }
}},
{ "$sort": { "total_qty": -1 }}
])
Related
I am watching a tutorial I can understand how this aggregate works, What is the use of pings, $$ROOT in it.
client = pymongo.MongoClient(MY_URL)
pings = client['mflix']['watching_pings']
cursor = pings.aggregate([
{
"$sample": { "size": 50000 }
},
{
"$addFields": {
"dayOfWeek": { "$dayOfWeek": "$ts" },
"hourOfDay": { "$hour": "$ts" }
}
},
{
"$group": { "_id": "$dayOfWeek", "pings": { "$push": "$$ROOT" } }
},
{
"$sort": { "_id": 1 }
}
]);
Let's assume that our collection looks like below:
{
"_id" : ObjectId("b9"),
"key" : 1,
"value" : 20,
"history" : ISODate("2020-05-16T00:00:00Z")
},
{
"_id" : ObjectId("ba"),
"key" : 1,
"value" : 10,
"history" : ISODate("2020-05-13T00:00:00Z")
},
{
"_id" : ObjectId("bb"),
"key" : 3,
"value" : 50,
"history" : ISODate("2020-05-12T00:00:00Z")
},
{
"_id" : ObjectId("bc"),
"key" : 2,
"value" : 0,
"history" : ISODate("2020-05-13T00:00:00Z")
},
{
"_id" : ObjectId("bd"),
"key" : 2,
"value" : 10,
"history" : ISODate("2020-05-16T00:00:00Z")
}
Now based on the history field you want to group and insert the whole documents in to an array field 'items'. Here $$ROOT variable will be helpful.
So, the aggregation query to achieve the above will be:
db.collection.aggregate([{
$group: {
_id: '$history',
items: {$push: '$$ROOT'}
}
}])
It will result in following output:
{
"_id" : ISODate("2020-05-12T00:00:00Z"),
"items" : [
{
"_id" : ObjectId("bb"),
"key" : 3,
"value" : 50,
"history" : ISODate("2020-05-12T00:00:00Z")
}
]
},
{
"_id" : ISODate("2020-05-13T00:00:00Z"),
"items" : [
{
"_id" : ObjectId("ba"),
"key" : 1,
"value" : 10,
"history" : ISODate("2020-05-13T00:00:00Z")
},
{
"_id" : ObjectId("bc"),
"key" : 2,
"value" : 0,
"history" : ISODate("2020-05-13T00:00:00Z")
}
]
},
{
"_id" : ISODate("2020-05-16T00:00:00Z"),
"items" : [
{
"_id" : ObjectId("b9"),
"key" : 1,
"value" : 20,
"history" : ISODate("2020-05-16T00:00:00Z")
},
{
"_id" : ObjectId("bd"),
"key" : 2,
"value" : 10,
"history" : ISODate("2020-05-16T00:00:00Z")
}
]
}
I hope it helps.
this is my data :
> db.bookmarks.find({"userId" : "56b9b74bf976ab70ff6b9999"}).pretty()
{
"_id" : ObjectId("56c2210fee4a33579f4202dd"),
"userId" : "56b9b74bf976ab70ff6b9999",
"items" : [
{
"itemId" : "28",
"timestamp" : "2016-02-12T18:07:28Z"
},
{
"itemId" : "29",
"timestamp" : "2016-02-12T18:07:29Z"
},
{
"itemId" : "30",
"timestamp" : "2016-02-12T18:07:30Z"
},
{
"itemId" : "31",
"timestamp" : "2016-02-12T18:07:31Z"
},
{
"itemId" : "32",
"timestamp" : "2016-02-12T18:07:32Z"
},
{
"itemId" : "33",
"timestamp" : "2016-02-12T18:07:33Z"
},
{
"itemId" : "34",
"timestamp" : "2016-02-12T18:07:34Z"
}
]
}
I want to have something like (actually i hope the _id can become userId too) :
{
"_id" : "56b9b74bf976ab70ff6b9999",
"items" : [
{ "itemId": "32", "timestamp": "2016-02-12T18:07:32Z" },
{ "itemId": "31", "timestamp": "2016-02-12T18:07:31Z" },
{ "itemId": "30", "timestamp": "2016-02-12T18:07:30Z" }
]
}
What I have now :
> db.bookmarks.aggregate(
... { $match: { "userId" : "56b9b74bf976ab70ff6b9999" } },
... { $unwind: '$items' },
... { $sort: { 'items.timestamp': -1} },
... { $skip: 2 },
... { $limit: 3},
... { $group: { '_id': '$userId' , items: { $push: '$items.itemId' } } }
... ).pretty()
{ "_id" : "56b9b74bf976ab70ff6b9999", "items" : [ "32", "31", "30" ] }
i tried to read the document in mongo and find out i can $push, but somehow i cannot find a way to push such object, which is not defined anywhere in the whole object. I want to have the timestamp also.. but i don't know how should i modified the $group (or others??) to do so. thanks for helping!
This code, which I tested in the MongoDB 3.2.1 shell, should give you the output format that you want:
> db.bookmarks.aggregate(
{ "$match" : { "userId" : "Ursula" } },
{ "$unwind" : "$items" },
{ "$sort" : { "items.timestamp" : -1 } },
{ "$skip" : 2 },
{ "$limit" : 3 },
{ "$group" : { "_id" : "$userId", items: { "$push" : { "myPlace" : "$items.itemId", "myStamp" : "$items.timestamp" } } } } ).pretty()
Running the above will produce this output:
{
"_id" : "Ursula",
"items" : [
{
"myPlace" : "52",
"myStamp" : ISODate("2016-02-13T18:07:32Z")
},
{
"myPlace" : "51",
"myStamp" : ISODate("2016-02-13T18:07:31Z")
},
{
"myPlace" : "50",
"myStamp" : ISODate("2016-02-13T18:07:30Z")
}
]
}
In MongoDB version 3.2.x, you can also use the $out operator in the very last stage of the aggregation pipeline, and have the output of the aggregation query written to a collection. Here is the code I used:
> db.bookmarks.aggregate(
{ "$match" : { "userId" : "Ursula" } },
{ "$unwind" : "$items" },
{ "$sort" : { "items.timestamp" : -1 } },
{ "$skip" : 2 },
{ "$limit" : 3 },
{ "$group" : { "_id" : "$userId", items: { "$push" : { "myPlace" : "$items.itemId", "myStamp" : "$items.timestamp" } } } },
{ "$out" : "ursula" } )
This gives me a collection named "ursula":
> show collections
ursula
and I can query that collection:
> db.ursula.find().pretty()
{
"_id" : "Ursula",
"items" : [
{
"myPlace" : "52",
"myStamp" : ISODate("2016-02-13T18:07:32Z")
},
{
"myPlace" : "51",
"myStamp" : ISODate("2016-02-13T18:07:31Z")
},
{
"myPlace" : "50",
"myStamp" : ISODate("2016-02-13T18:07:30Z")
}
]
}
>
Last of all, this is the input document I used in the aggregation query. You can compare this document to how I coded the aggregation query to see how I built the new items array.
> db.bookmarks.find( { "userId" : "Ursula" } ).pretty()
{
"_id" : ObjectId("56c240ed55f2f6004dc3b25c"),
"userId" : "Ursula",
"items" : [
{
"itemId" : "48",
"timestamp" : ISODate("2016-02-13T18:07:28Z")
},
{
"itemId" : "49",
"timestamp" : ISODate("2016-02-13T18:07:29Z")
},
{
"itemId" : "50",
"timestamp" : ISODate("2016-02-13T18:07:30Z")
},
{
"itemId" : "51",
"timestamp" : ISODate("2016-02-13T18:07:31Z")
},
{
"itemId" : "52",
"timestamp" : ISODate("2016-02-13T18:07:32Z")
},
{
"itemId" : "53",
"timestamp" : ISODate("2016-02-13T18:07:33Z")
},
{
"itemId" : "54",
"timestamp" : ISODate("2016-02-13T18:07:34Z")
}
]
}
i have a document like this :
{
"ExtraFields" : [
{
"value" : "print",
"fieldID" : ObjectId("5535627631efa0843554b0ea")
},
{
"value" : "14",
"fieldID" : ObjectId("5535627631efa0843554b0eb")
},
{
"value" : "POLYE",
"fieldID" : ObjectId("5535627631efa0843554b0ec")
},
{
"value" : "30",
"fieldID" : ObjectId("5535627631efa0843554b0ed")
},
{
"value" : "0",
"fieldID" : ObjectId("5535627631efa0843554b0ee")
},
{
"value" : "0",
"fieldID" : ObjectId("5535627731efa0843554b0ef")
},
{
"value" : "0",
"fieldID" : ObjectId("5535627831efa0843554b0f0")
},
{
"value" : "42",
"fieldID" : ObjectId("5535627831efa0843554b0f1")
},
{
"value" : "30",
"fieldID" : ObjectId("5535627831efa0843554b0f2")
},
{
"value" : "14",
"fieldID" : ObjectId("5535627831efa0843554b0f3")
},
{
"value" : "19",
"fieldID" : ObjectId("5535627831efa0843554b0f4")
}
],
"id" : ObjectId("55369e60733e4914550832d0"), "title" : "A product"
}
what i want is to match one or more sets from the ExtraFields array. For example, all the products that contain the values print and 30. Since a value may be found in more than one fieldID (like 0 or true) we need to create a set like
WHERE (fieldID : ObjectId("5535627631efa0843554b0ea"), value : "print")
Where i'm having problems is when querying more than one fields. The pipeline i came up with is :
db.products.aggregate([
{'$unwind': '$ExtraFields'},
{
'$match': {
'$and': [{
'$and': [{'ExtraFields.value': {'$in': ["A52A2A"]}}, {
'ExtraFields.fieldID': ObjectId("5535627631efa0843554b0ea")
}]
}
,
{
'$and': [{'ExtraFields.value': '14'}, {'ExtraFields.fieldID': ObjectId("5535627631efa0843554b0eb")}]
}
]
}
},
]);
This returns zero results, but this is what i want to do in theory. Match all items that contain set 1 AND all that contain set 2.
The end result should look like a faceted search output :
[
{
"_id" : {
"values" : "18",
"fieldID" : ObjectId("5535627831efa0843554b0f3")
},
"count" : 2
},
{
"_id" : {
"values" : "33",
"fieldID" : ObjectId("5535627831efa0843554b0f2")
},
"count" : 1
}
]
Any ideas?
You could try the following aggregation pipeline
db.products.aggregate([
{
"$match": {
"ExtraFields.value": { "$in": ["A52A2A", "14"] },
"ExtraFields.fieldID": {
"$in": [
ObjectId("5535627631efa0843554b0ea"),
ObjectId("5535627631efa0843554b0eb")
]
}
}
},
{
"$unwind": "$ExtraFields"
},
{
"$match": {
"ExtraFields.value": { "$in": ["A52A2A", "14"] },
"ExtraFields.fieldID": {
"$in": [
ObjectId("5535627631efa0843554b0ea"),
ObjectId("5535627631efa0843554b0eb")
]
}
}
},
{
"$group": {
"_id": {
"value": "$ExtraFields.value",
"fieldID": "$ExtraFields.fieldID"
},
"count": {
"$sum": 1
}
}
}
])
With the sample document provided, this gives the output:
/* 1 */
{
"result" : [
{
"_id" : {
"value" : "14",
"fieldID" : ObjectId("5535627631efa0843554b0eb")
},
"count" : 1
}
],
"ok" : 1
}
I want a map reduce function to draw the below output from the below input collection which satisfy the below condition.
Input collection:
[{
a:1,
b:'test',
indices:[1,2,4,5]
}, {
a:2,
b:'test',
indices:[2, 3, 5]
}, {
a:2,
b:'test',
indices:[1, 2, 4]
}, {
a:3,
b:'apple',
indices:[1, 2]
}, {
a:4,
b:'apple',
indices:[1, 3, 5]
}, {
a:5,
b:'orange',
indices:[232]
}, {
a:5,
b:'dummy',
indices:[2]
}, {
a:6,
b:'dummy',
indices:[11, 2, 4]
}, {
a:6,
b:'dummy',
indices:[11, 3, 2]
}, {
a:6,
b:'dummy',
indices:[1, 2, 3, 4, 5]
}]
Conditions are:
select only which has indices array has 2. This can be send as
query. i.e, query:{indices:{$in:2}}
Group by b
If there are duplicates a, then it should be considered as 1 eg: Document having a=2 are present in two times satisfying the condition indices
has 2.
My input collection always satisfies the condition of if a
prsents in "test", it will not present in dummy/apple/etc. but a
can be duplicate.
Here is what I tried:
db.x.mapReduce(function(){
emit(this.b, 1);
}, function(key, reducable){
return Array.sum(reducable);
}, {
out: {inline: 1},
query:{
'indices':{$in:2}
}
});
Output:
[
{
"_id" : test",
"value" : {
"count" : 3 -> It should be 2
}
},{
"_id" : apple",
"value" : {
"count" : 2
}
},{
"_id" : dummy",
"value" : {
"count" : 4 -> It should be 2
}
}]
Expected Output:
[{
"_id" : test",
"value" : {
"count" : 2
}
},{
"_id" : apple",
"value" : {
"count" : 2
}
},{
"_id" : dummy",
"value" : {
"count" : 2
}
}]
No need for map/reduce. Use aggregation:
> db.crawler_status.aggregate([
{ "$match" : { "indices" : 2 } },
{ "$group" : { "_id" : { "b" : "$b", "a" : "$a" } } },
{ "$group" : { "_id" : "$_id.b", "count" : { "$sum" : 1 } } }
])
{ "_id" : "test", "count" : 2 }
{ "_id" : "apple", "count" : 1 } // your sample output was mistaken
{ "_id" : "dummy", "count" : 2 }
I have an item collection with following documents.
{ "item" : "i1", "category" : "c1", "brand" : "b1" }
{ "item" : "i2", "category" : "c2", "brand" : "b1" }
{ "item" : "i3", "category" : "c1", "brand" : "b2" }
{ "item" : "i4", "category" : "c2", "brand" : "b1" }
{ "item" : "i5", "category" : "c1", "brand" : "b2" }
I want to separate aggregation results --> count by category, count by brand. Please note, it is not count by (category,brand)
I am able to do this using map-reduce using following code.
map = function(){
emit({type:"category",category:this.category},1);
emit({type:"brand",brand:this.brand},1);
}
reduce = function(key, values){
return Array.sum(values)
}
db.item.mapReduce(map,reduce,{out:{inline:1}})
And the result is
{
"results" : [
{
"_id" : {
"type" : "brand",
"brand" : "b1"
},
"value" : 3
},
{
"_id" : {
"type" : "brand",
"brand" : "b2"
},
"value" : 2
},
{
"_id" : {
"type" : "category",
"category" : "c1"
},
"value" : 3
},
{
"_id" : {
"type" : "category",
"category" : "c2"
},
"value" : 2
}
],
"timeMillis" : 21,
"counts" : {
"input" : 5,
"emit" : 10,
"reduce" : 4,
"output" : 4
},
"ok" : 1,
}
I can get same results by firing two different aggregation commands as below.
db.item.aggregate({$group:{_id:"$category",count:{$sum:1}}})
db.item.aggregate({$group:{_id:"$brand",count:{$sum:1}}})
Is there anyway I can do the same using aggregation framework by single aggregation command.
I have simplified my case here, but in actual I need this grouping from fields in array of subdocuments. Assume the above is structure after I do unwind.
It is a real-time query (someone waiting for response), though on smaller dataset, so execution time is important.
I am using MongoDB 2.4.
Starting in Mongo 3.4, the $facet aggregation stage greatly simplifies this type of use case by processing multiple aggregation pipelines within a single stage on the same set of input documents:
// { "item" : "i1", "category" : "c1", "brand" : "b1" }
// { "item" : "i2", "category" : "c2", "brand" : "b1" }
// { "item" : "i3", "category" : "c1", "brand" : "b2" }
// { "item" : "i4", "category" : "c2", "brand" : "b1" }
// { "item" : "i5", "category" : "c1", "brand" : "b2" }
db.collection.aggregate(
{ $facet: {
categories: [{ $group: { _id: "$category", count: { "$sum": 1 } } }],
brands: [{ $group: { _id: "$brand", count: { "$sum": 1 } } }]
}}
)
// {
// "categories" : [
// { "_id" : "c1", "count" : 3 },
// { "_id" : "c2", "count" : 2 }
// ],
// "brands" : [
// { "_id" : "b1", "count" : 3 },
// { "_id" : "b2", "count" : 2 }
// ]
// }
Over a large data set I would say that your current mapReduce approach would be the best one, because the aggregation technique for this would not work well with large data. But possibly over a reasonably small size it might just be what you need:
db.items.aggregate([
{ "$group": {
"_id": null,
"categories": { "$push": "$category" },
"brands": { "$push": "$brand" }
}},
{ "$project": {
"_id": {
"categories": "$categories",
"brands": "$brands"
},
"categories": 1
}},
{ "$unwind": "$categories" },
{ "$group": {
"_id": {
"brands": "$_id.brands",
"category": "$categories"
},
"count": { "$sum": 1 }
}},
{ "$group": {
"_id": "$_id.brands",
"categories": { "$push": {
"category": "$_id.category",
"count": "$count"
}},
}},
{ "$project": {
"_id": "$categories",
"brands": "$_id"
}},
{ "$unwind": "$brands" },
{ "$group": {
"_id": {
"categories": "$_id",
"brand": "$brands"
},
"count": { "$sum": 1 }
}},
{ "$group": {
"_id": null,
"categories": { "$first": "$_id.categories" },
"brands": { "$push": {
"brand": "$_id.brand",
"count": "$count"
}}
}}
])
Not really the same as the mapReduce output, you could throw in some more stages to change the output format, but this should be usable:
{
"_id" : null,
"categories" : [
{
"category" : "c2",
"count" : 2
},
{
"category" : "c1",
"count" : 3
}
],
"brands" : [
{
"brand" : "b2",
"count" : 2
},
{
"brand" : "b1",
"count" : 3
}
]
}
As you can see, this involves a fair bit of shuffling between arrays in order to group each set of either "category" or "brand" within the same pipeline process. Again I will say, this will not do well for large data, but for something like "items in an order" it would probably do nicely.
Of course as you say, you have simplified somewhat, so the first grouping key on null is either going to be something else or either narrowed down to do that null case by an earlier $match stage, which is probably what you want to do.