Mongodb - create ranking of values from field array - mongodb

I am trying to sort a mongodb aggregate I don't what it is happening. I was searching some solution in stack overflow but they didn't work and I don't know why...
My idea is return a ranking of values from the field array (tags). I could achieve the list the sum of values but I can not sort it...
This is the query that I could do and it seems that it works:
db.getCollection("metadata").aggregate(
{$unwind: '$tags'},
{$group: {_id:'$tags.name', total: {$sum: 1}}}
);
Because I receive this result that It has sense:
{
"_id" : "kite",
"total" : 1.0
}
{
"_id" : "piggy bank",
"total" : 1.0
}
{
"_id" : "sorrel",
"total" : 1.0
}
{
"_id" : "eggnog"
"total" : 4.0
}
{
"_id" : "Weimaraner",
"total" : 1.0
}
{
"_id" : "bassinet",
"total" : 15.0
}
{
"_id" : "squirrel monkey",
"total" : 1.0
}
{
"_id" : "bath towel",
"total" : 6.0
}
TRIES
When I tried something like this:
db.getCollection("metadata").aggregate(
{$unwind: '$tags'},
{$group: {_id:'$tags.name', total: {$sum: 1}}},
{$sort: {total: -1}}
);
RESULT TRY:
{
"_id" : "baboon",
"total" : 12.0
}
{
"_id" : "snow leopard",
"total" : 4.0
}
{
"_id" : "green lizard",
"total" : 5.0
}
{
"_id" : "Dandie Dinmont",
"total" : 7.0
}
{
"_id" : "echidna",
"total" : 8.0
}
{
"_id" : "bee eater",
"total" : 6.0
}
or like this:
db.getCollection("metadata").aggregate(
{$unwind: '$tags'},
{$group: {_id: { name:'$tags.name', total: {$sum: 1}}}},
{$sort: {total: -1}}
);
The result doesn't sort or directly not sum the values...
EXTRA
This is the query if I want to list all the entries with the array:
db.getCollection('metadata').find({tags: {$exists: true}})
And the result is:
/* 2 */
{
"_id" : ObjectId("5900af3ff6844d2f7519fe13"),
"user_id" : 23,
"company_id" : 1,
"created" : ISODate("2017-04-26T14:31:27.000Z"),
"md5file" : "fdd30b1ca52e1c15f330f46c0079498c",
"path" : "/storage/emulated/0/DCIM/Camera/IMG_20160605_133703.jpg",
"image_width" : 3456,
"image_height" : 4608,
"originalTags" : [
{
"name" : "sleeping bag",
"percentage" : 0.7529412
},
{
"name" : "diaper",
"percentage" : 0.05490196
},
{
"name" : "bib",
"percentage" : 0.039215688
}
],
"tags" : [
{
"name" : "sleeping bag",
"percentage" : 0.7529412
}
]
}
/* 3 */
{
"_id" : ObjectId("5900af3ff6844d2f7519fe14"),
"user_id" : 23,
"company_id" : 1,
"created" : ISODate("2017-04-26T14:31:27.000Z"),
"md5file" : "22612c8bc99d1031146f7c9918555572",
"path" : "/storage/emulated/0/DCIM/Camera/IMG_20160605_164243.jpg",
"image_width" : 4608,
"image_height" : 3456,
"originalTags" : [
{
"name" : "bath towel",
"percentage" : 0.62352943
},
{
"name" : "quilt",
"percentage" : 0.101960786
},
{
"name" : "cradle",
"percentage" : 0.043137256
}
],
"tags" : [
{
"name" : "bath towel",
"percentage" : 0.62352943
}
]
}

Aggregation pipeline is an array. It should be wrapped in square brackets []:
db.getCollection("metadata").aggregate(
[
{$unwind: '$tags'},
{$group: {_id:'$tags.name', total: {$sum: 1}}},
{$sort: {total: -1}}
]
);

Related

Get matched embedded document(s) from array

I've got a lot of documents using the following structure in MongoDB:
{
"_id" : ObjectId("..."),
"plant" : "XY_4711",
"hour" : 1473321600,
"units" : [
{
"_id" : ObjectId("..."),
"unit_id" : 10951,
"values" : [
{
"quarter" : 1473321600,
"value" : 395,
},
{
"quarter" : 1473322500,
"value" : 402,
},
{
"quarter" : 1473323400,
"value" : 406,
},
{
"quarter" : 1473324300,
"value" : 410,
}
]
}
]
}
Now I need to find all embedded document values where the quarter is between some given timestamps (eg: { $gte: 1473324300, $lte: 1473328800 }).
I've only got the unit_id and the quarter timestamp from/to for filtering the documents. And I only need the quarter and value grouped and ordered by unit.
I'm new in MongoDB and read something about find() and aggregate(). But I don't know how to do it. MongoDB 3.0 is installed on the server.
Finally I've got it:
I simply have to take apart each array, filtering out the things I don't need and put it back together:
db.collection.aggregate([
{$match : {$and : [{"units.values.quarter" : {$gte : 1473324300}}, {"units.values.quarter" : {$lte : 1473328800 }}]}},
{$unwind: "$units"},
{$unwind: "$units.values"},
{$match : {$and : [{"units.values.quarter" : {$gte : 1473324300}}, {"units.values.quarter" : {$lte : 1473328800 }}]}},
{$project: {"units": {values: {quarter: 1, "value": 1}, unit_id: 1}}},
{$group: {"_id": "$units.unit_id", "quarter_values": {$push: "$units.values"}}} ,
{$sort: {"_id": 1}}
])
Will give:
{
"_id" : 10951,
"quarter_values" : [
{
"quarter" : 1473324300,
"value" : 410
},
{
"quarter" : 1473325200,
"value" : 412
},
{
"quarter" : 1473326100,
"value" : 412
},
{
"quarter" : 1473327000,
"value" : 411
},
{
"quarter" : 1473327900,
"value" : 408
},
{
"quarter" : 1473328800,
"value" : 403
}
]
}
See: Return only matched sub-document elements within a nested array for a detailed description!
I think I have to switch to $map or $filter in the future. Thanks to notionquest for supporting my questions :)
Please see the sample query below. I didn't exactly get your grouping requirement. However, with this sample query you should be able to change and get your desired output.
db.collection.aggregate([
{$unwind : {path : "$units"}},
{$match : {$and : [{"units.values.quarter" : {$gte : 1473324300}}, {"units.values.quarter" : {$lte : 1473328800 }}]}},
{$project : {"units" : {values : {quarter : 1, "value" : 1}, unit_id : 1}}},
{$group : { _id : "$units.unit_id", quarter_values : { $push :{ quarter : "$units.values.quarter", value : "$units.values.value"}}}},
{$sort : {_id : 1 }}
]);
Sample output:-
{
"_id" : 10951,
"quarter_values" : [
{
"quarter" : [
1473321600,
1473322500,
1473323400,
1473324300
],
"value" : [
395,
402,
406,
410
]
}
]
}

Mongodb aggregation: how to use unwind->group->project multiple times

I have an orders collection where I need to calculate some sums from multiple sub-arrays arrays but I can't figure out how to loose the multiplied items that the double unwind creates.
db.Orders.aggregate(
{$unwind: "$items"},
{$unwind: "$shipping"},
{$group: {
_id: {
year: { '$year': '$createdAt' },
month: { '$month': '$createdAt' },
day: { '$dayOfMonth': '$createdAt' }
},
mainItems: { $addToSet: '$items' },
totalSales: {$sum: {
$multiply: ["$items.quantity", "$items.variants.price"]
}},
averageSales: {$avg: {$multiply: ["$items.quantity", "$items.variants.price"]}},
/* this will not sum the individial orders because the unwind
* created multiple document per order*/
ordersPlaced: {$sum: 1},
itemsPurchased: {$sum: "$items.quantity"},
totalRefundAmount: {$sum: 0},
chargedForShipping: {$sum: "$shipping.shipmentMethod.rate"}
}}
)
If I take out the shipping from the unwind and the group the query will return the correct values except for the chargedForShipping (0 since it's unwinded) and ordersPlaces which will still be more than expected (but I also need the shipping information and even more additional ones that I took out for easier understanding).
Sample data:
[{
"_id" : "xK29ZHxGcYvgWgx5p",
"sessionId" : "yw7e9G7uBzYTy9Grq",
"userId" : "fZREMm2DmsnMosMKj",
"shopId" : "oiqQDnuBwabj44q2o",
"billing" : [
{
"shopId" : "oiqQDnuBwabj44q2o",
"_id" : "9TMJj9w65MmAkgg27",
"paymentMethod" : {
"amount" : 22.45,
"status" : "settled",
"mode" : "capture",
"transactionId" : "AP",
"createdAt" : ISODate("2016-02-15T13:44:35.116Z"),
"transactions" : [
{ type:"refund", amount:5}
]
}
},
{
"shopId" : "9YfkXWyCci8fN43Pj",
"_id" : "RwW8xMnFzQqdTpqtg",
"paymentMethod" : {
"createdAt" : ISODate("2016-02-15T13:44:35.116Z")
}
},
{
"shopId" : "SgXWPKGJkxBw6qsbT",
"_id" : "ASizt6BtkxpCxgEJn",
"paymentMethod" : {
"createdAt" : ISODate("2016-02-15T13:44:35.116Z")
}
}
],
"shipping" : [
{
"_id" : "yXb5T5zLuxPYmgoT5",
"shipmentMethod" : {
"name" : "Continental US",
"_id" : "womiJX2QZBFQQWFur",
"rate" : 9.949999999999999,
"shopId" : "9YfkXWyCci8fN43Pj",
},
"items" : [
{
"_id" : "48s9bmDfrRMqnkije",
"productId" : "KXtF5xqERWJsXk2yP",
"shopId" : "SgXWPKGJkxBw6qsbT",
"variantId" : "YQDHuyPHbhx4wruZx",
"quantity" : 1
}
],
"packed" : false,
"shipped" : false,
}
],
"items" : [
{
"_id" : "hhuiGFTBkLACLpPjQ",
"shopId" : "9YfkXWyCci8fN43Pj",
"productId" : "sDYNXMrnRJiyQ8gex",
"quantity" : 1,
"variants" : {
"_id" : "muJi6Bqnq2CD8B7AR",
"price" : 2.5,
"title" : "egy",
"weight" : 23,
},
"type" : "simple",
},
{
"_id" : "48s9bmDfrRMqnkije",
"shopId" : "SgXWPKGJkxBw6qsbT",
"productId" : "KXtF5xqERWJsXk2yP",
"quantity" : 1,
"variants" : {
"_id" : "YQDHuyPHbhx4wruZx",
"title" : "Bogi varinat title",
"price" : 10,
"type" : "variant",
"compareAtPrice" : 100000,
"weight" : 100,
},
"type" : "simple",
}
],
"email" : "test#user.com",
"createdAt" : ISODate("2016-02-15T13:44:35.091Z"),
"updatedAt" : ISODate("2016-02-15T14:24:55.174Z")
}]
What I would need is orderTotal per month, shippingTotal per month, totalRefunded per month, average sales per month. The issue is one I need from the items sub-array the other from the shipping sub-array and the third from the billing sub-array that is why I have issues with the unwind.

MongoDB: Sort in combination with Aggregation group

I have a collection called transaction with below documents,
/* 0 */
{
"_id" : ObjectId("5603fad216e90d53d6795131"),
"statusId" : "65c719e6727d",
"relatedWith" : "65c719e67267",
"status" : "A",
"userId" : "100",
"createdTs" : ISODate("2015-09-24T13:15:36.609Z")
}
/* 1 */
{
"_id" : ObjectId("5603fad216e90d53d6795134"),
"statusId" : "65c719e6727d",
"relatedWith" : "65c719e6726d",
"status" : "B",
"userId" : "100",
"createdTs" : ISODate("2015-09-24T13:14:31.609Z")
}
/* 2 */
{
"_id" : ObjectId("5603fad216e90d53d679512e"),
"statusId" : "65c719e6727d",
"relatedWith" : "65c719e6726d",
"status" : "C",
"userId" : "100",
"createdTs" : ISODate("2015-09-24T13:13:36.609Z")
}
/* 3 */
{
"_id" : ObjectId("5603fad216e90d53d6795132"),
"statusId" : "65c719e6727d",
"relatedWith" : "65c719e6726d",
"status" : "D",
"userId" : "100",
"createdTs" : ISODate("2015-09-24T13:16:36.609Z")
}
When I run the below Aggregation query without $group,
db.transaction.aggregate([
{
"$match": {
"userId": "100",
"statusId": "65c719e6727d"
}
},
{
"$sort": {
"createdTs": -1
}
}
])
I get the result in expected sorting order. i.e Sort createdTs in descending order (Minimal result)
/* 0 */
{
"result" : [
{
"_id" : ObjectId("5603fad216e90d53d6795132"),
"createdTs" : ISODate("2015-09-24T13:16:36.609Z")
},
{
"_id" : ObjectId("5603fad216e90d53d6795131"),
"createdTs" : ISODate("2015-09-24T13:15:36.609Z")
},
{
"_id" : ObjectId("5603fad216e90d53d6795134"),
"createdTs" : ISODate("2015-09-24T13:14:31.609Z")
},
{
"_id" : ObjectId("5603fad216e90d53d679512e"),
"createdTs" : ISODate("2015-09-24T13:13:36.609Z")
}
],
"ok" : 1
}
If I apply the below aggregation with $group, the resultant is inversely sorted(i.e Ascending sort)
db.transaction.aggregate([
{
"$match": {
"userId": "100",
"statusId": "65c719e6727d"
}
},
{
"$sort": {
"createdTs": -1
}
},
{
$group: {
"_id": {
"statusId": "$statusId",
"relatedWith": "$relatedWith",
"status": "$status"
},
"status": {$first: "$status"},
"statusId": {$first: "$statusId"},
"relatedWith": {$first: "$relatedWith"},
"createdTs": {$first: "$createdTs"}
}
}
]);
I get the result in inverse Order i.e. ** Sort createdTs in Ascending order**
/* 0 */
{
"result" : [
{
"_id" : ObjectId("5603fad216e90d53d679512e"),
"createdTs" : ISODate("2015-09-24T13:13:36.609Z")
},
{
"_id" : ObjectId("5603fad216e90d53d6795134"),
"createdTs" : ISODate("2015-09-24T13:14:31.609Z")
},
{
"_id" : ObjectId("5603fad216e90d53d6795131"),
"createdTs" : ISODate("2015-09-24T13:15:36.609Z")
},
{
"_id" : ObjectId("5603fad216e90d53d6795132"),
"createdTs" : ISODate("2015-09-24T13:16:36.609Z")
}
],
"ok" : 1
}
Where am I wrong ?
The $group stage doesn't insure the ordering of the results. See here the first paragraph.
If you want the results to be sorted after a $group, you need to add a $sort after the $group stage.
In your case, you should move the $sort after the $group and before you ask the question : No, the $sort won't be able to use an index after the $group like it does before the $group :-).
The internal algorithm of $group seems to keep some sort of ordering (reversed apparently), but I would not count on that and add a $sort.
You are not doing anything wrong here, Its a $group behavior in Mongodb
Lets have a look in this example
Suppose you have following doc in collection
{ "_id" : 1, "item" : "abc", "price" : 10, "quantity" : 2, "date" : ISODate("2014-01-01T08:00:00Z") }
{ "_id" : 2, "item" : "jkl", "price" : 20, "quantity" : 1, "date" : ISODate("2014-02-03T09:00:00Z") }
{ "_id" : 3, "item" : "xyz", "price" : 5, "quantity" : 5, "date" : ISODate("2014-02-03T09:05:00Z") }
{ "_id" : 4, "item" : "abc", "price" : 10, "quantity" : 10, "date" : ISODate("2014-02-15T08:00:00Z") }
{ "_id" : 5, "item" : "xyz", "price" : 5, "quantity" : 10, "date" : ISODate("2014-02-15T09:05:00Z") }
{ "_id" : 6, "item" : "xyz", "price" : 5, "quantity" : 5, "date" : ISODate("2014-02-15T12:05:10Z") }
{ "_id" : 7, "item" : "xyz", "price" : 5, "quantity" : 10, "date" : ISODate("2014-02-15T14:12:12Z") }
Now if you run this
db.collection.aggregate([{ $sort: { item: 1,date:1}} ] )
the output will be in ascending order of item and date.
Now if you add group stage in aggregation pipeline it will reverse the order.
db.collection.aggregate([{ $sort: { item: 1,date:1}},{$group:{_id:"$item"}} ] )
Output will be
{ "_id" : "xyz" }
{ "_id" : "jkl" }
{ "_id" : "abc" }
Now the solution for your problem
change "createdTs": -1 to "createdTs": 1 for group

MongoDB aggregate $match and $group with $sum

i have a collection with documents like this:
{
"Company" : "4433",
"Descripcion" : "trabajo",
"Referencia" : "11817",
"HoraImportado" : "15:54",
"ImportedOd" : "2014-05-20T13:54:28.493Z",
"Items" : [],
"Notes" : [
{
"_id" : ObjectId("537b5ea4c61b1d1743f43420"),
"NoteDateTime" : "2014-05-20T13:54:44.418Z",
"Description" : "nota",
"IsForTechnician" : true,
"Username" : "admin"
},
{
"_id" : ObjectId("537c4a549e956f77ab8c7c38"),
"NoteDateTime" : ISODate("2014-05-21T06:40:20.299Z"),
"Description" : "ok",
"IsForTechnician" : true,
"Username" : "admin"
}
],
"OrderState" : "Review",
"SiniestroDe" : "Emergencia",
"Technicians" : [
{
"TechnicianId" : ObjectId("53465f9d519c94680327965d"),
"Name" : "Administrator",
"AssignedOn" : ISODate("2014-05-20T13:54:44.373Z"),
"RemovedOn" : null
}
],
"TechniciansHistory" : [
{
"TechnicianId" : ObjectId("53465f9d519c94680327965d"),
"Name" : "Administrator",
"AssignedOn" : ISODate("2014-05-20T13:54:44.373Z"),
"RemovedOn" : null
},
{
"Name" : "Nuevo",
"AssignedOn" : ISODate("2014-05-20T13:54:44.373Z"),
"RemovedOn" : null,
"TechnicianId" : ObjectId("5383577a994be8b9a9e3f01e")
}
],
"Telefonos" : "615554006",
"_id" : ObjectId("537b5ea4c61b1d1743f4341f"),
"works" : [
{
"code" : "A001",
"name" : "Cambiar bombilla",
"orderId" : "537b5ea4c61b1d1743f4341f",
"price" : "11",
"ID" : 33,
"lazyLoaded" : true,
"status" : 0,
"Date" : ISODate("2014-05-21T06:40:20.299Z"),
"TechnicianId" : "53465f9d519c94680327965d",
"_id" : ObjectId("537c4a549e956f77ab8c7c39")
},
{
"code" : "A001",
"name" : "Cambiar bombilla",
"orderId" : "537b5ea4c61b1d1743f4341f",
"price" : "11",
"ID" : 34,
"lazyLoaded" : true,
"status" : 0,
"Date" : ISODate("2014-05-21T06:40:20.299Z"),
"TechnicianId" : "53465f9d519c94680327965d",
"_id" : ObjectId("537c4a549e956f77ab8c7c3a")
}
]
}
Now i want to get the works for a selected TechnicianId array, group by TechnicianId and get the sum of the works.price for each technician.+
I try with this:
db.orders.aggregate([
{ $match: { 'works.TechnicianId': {$in:['53465f9d519c94680327965d']}}},
{ $group: { _id: "$works.TechnicianId",total:{$sum:'$works.price'}}},
])
And this is the result:
{
"result" : [
{
"_id" : [
"53465f9d519c94680327965d",
"53465f9d519c94680327965d"
],
"total" : 0
}
],
"ok" : 1
}
The total its the $sum but its 0 but should be 44.
Try adding unwind,
db.orders.aggregate([
{ $match: { 'works.TechnicianId': {$in:['53465f9d519c94680327965d']}}},
{ $unwind: "$works" },
{ $group: { _id: "$works.TechnicianId",total:{$sum:'$works.price'}}},
])
Look here for more info : http://docs.mongodb.org/manual/reference/operator/aggregation/unwind/
The price value is a string. $sum only operates on Numbers.
I've checked this by running the following:
db.foo.insert({"cost": "1"})
db.foo.insert({"cost": "2"})
db.foo.insert({"cost": "3"})
db.foo.insert({"cost": 4})
db.foo.insert({"cost": 5})
db.foo.aggregate([{$group: {_id: null, cost: {$sum: "$cost"}}}])
{ "result" : [ { "_id" : null, "cost" : 9 } ], "ok" : 1 }
According to this answer, you can't cast values in normal Mongo queries, so you can't change the string to a number inline.
You should either update all values to a Number datatype or use map-reduce. I'd go for the former.
If the value is a string to prevent floating point errors, consider multiplying by 100 to store the value in cents: "10.50" --> 1050
As Lalit Agarwal indicated, you'll also need to unwind the array of works. Example of what happens if you don't:
db.bar.insert({"works": [{price: 10}]})
db.bar.insert({"works": [{price: 20}, {price: 30}]})
db.bar.insert({"works": [{price: 40}, {price: 50}]})
db.bar.aggregate([
{$group: {_id: null, total: {$sum: "$works.price"} }}
])
{ "result" : [ { "_id" : null, "total" : 0 } ], "ok" : 1 }
db.bar.aggregate([
{$unwind: "$works"},
{$group: {_id: null, total: {$sum: "$works.price"} }}
])
{ "result" : [ { "_id" : null, "total" : 150 } ], "ok" : 1 }
What $unwind does is make 5 documents out of the initial 3, all with a single value in the works field. It then groups and sums them.
db.inventory.insert(
{
item: “ABC1”,
details: {
model: “14Q3”,
manufacturer: “XYZ Company”
},
stock: [ { size: “S”, qty: 25 }, { size: “M”, qty: 50 } ],
category: “clothing”
}
)

Do $sort works for sub array document

I have a collection which has a field of array kind. I want to sort on the basis of a field of sub-array but Mongo is not sorting the data.
My collection is:
{
"_id" : ObjectId("51f1fcc08188d3117c6da351"),
"cust_id" : "abc123",
"ord_date" : ISODate("2012-10-03T18:30:00Z"),
"status" : "A",
"price" : 25,
"items" : [{
"sku" : "ggg",
"qty" : 7,
"price" : 2.5
}, {
"sku" : "ppp",
"qty" : 5,
"price" : 2.5
}]
}
My Query is:
db.orders.aggregate([
{ "$unwind" : "$items"} ,
{ "$match" : { }} ,
{ "$group" : { "items" : { "$addToSet" : { "sku" : "$items.sku"}} , "_id" : { }}} ,
{ "$sort" : { "items.sku" : 1}} ,
{ "$project" : { "_id" : 0 , "items" : 1}}
])
Result is:
"result" : [
{
"items" : [
{
"sku" : "ppp"
},
{
"sku" : "ggg"
}
]
}
],
"ok" : 1
}
Whereas "sku":"ggg" should come first when it is ascending.
You weant to do the sort BEFORE you regroup:
db.orders.aggregate([
{ "$unwind" : "$items"} ,
{ "$sort" : { "items.sku" : 1}},
{ "$match" : { }} ,
{ "$group" : { "items" : { "$push" : { "sku" : "$items.sku"}} , "_id" : null}} ,
{ "$project" : { "_id" : 0 , "items" : 1}}
])