Get matched embedded document(s) from array - mongodb

I've got a lot of documents using the following structure in MongoDB:
{
"_id" : ObjectId("..."),
"plant" : "XY_4711",
"hour" : 1473321600,
"units" : [
{
"_id" : ObjectId("..."),
"unit_id" : 10951,
"values" : [
{
"quarter" : 1473321600,
"value" : 395,
},
{
"quarter" : 1473322500,
"value" : 402,
},
{
"quarter" : 1473323400,
"value" : 406,
},
{
"quarter" : 1473324300,
"value" : 410,
}
]
}
]
}
Now I need to find all embedded document values where the quarter is between some given timestamps (eg: { $gte: 1473324300, $lte: 1473328800 }).
I've only got the unit_id and the quarter timestamp from/to for filtering the documents. And I only need the quarter and value grouped and ordered by unit.
I'm new in MongoDB and read something about find() and aggregate(). But I don't know how to do it. MongoDB 3.0 is installed on the server.
Finally I've got it:
I simply have to take apart each array, filtering out the things I don't need and put it back together:
db.collection.aggregate([
{$match : {$and : [{"units.values.quarter" : {$gte : 1473324300}}, {"units.values.quarter" : {$lte : 1473328800 }}]}},
{$unwind: "$units"},
{$unwind: "$units.values"},
{$match : {$and : [{"units.values.quarter" : {$gte : 1473324300}}, {"units.values.quarter" : {$lte : 1473328800 }}]}},
{$project: {"units": {values: {quarter: 1, "value": 1}, unit_id: 1}}},
{$group: {"_id": "$units.unit_id", "quarter_values": {$push: "$units.values"}}} ,
{$sort: {"_id": 1}}
])
Will give:
{
"_id" : 10951,
"quarter_values" : [
{
"quarter" : 1473324300,
"value" : 410
},
{
"quarter" : 1473325200,
"value" : 412
},
{
"quarter" : 1473326100,
"value" : 412
},
{
"quarter" : 1473327000,
"value" : 411
},
{
"quarter" : 1473327900,
"value" : 408
},
{
"quarter" : 1473328800,
"value" : 403
}
]
}
See: Return only matched sub-document elements within a nested array for a detailed description!
I think I have to switch to $map or $filter in the future. Thanks to notionquest for supporting my questions :)

Please see the sample query below. I didn't exactly get your grouping requirement. However, with this sample query you should be able to change and get your desired output.
db.collection.aggregate([
{$unwind : {path : "$units"}},
{$match : {$and : [{"units.values.quarter" : {$gte : 1473324300}}, {"units.values.quarter" : {$lte : 1473328800 }}]}},
{$project : {"units" : {values : {quarter : 1, "value" : 1}, unit_id : 1}}},
{$group : { _id : "$units.unit_id", quarter_values : { $push :{ quarter : "$units.values.quarter", value : "$units.values.value"}}}},
{$sort : {_id : 1 }}
]);
Sample output:-
{
"_id" : 10951,
"quarter_values" : [
{
"quarter" : [
1473321600,
1473322500,
1473323400,
1473324300
],
"value" : [
395,
402,
406,
410
]
}
]
}

Related

Aggregating Data in MongoDB based on properties values

I'm using MongoDB 3.4 and I need to create a query filter to fetch data from my mongo collection (ProductionEventsCollection).
Some values are calculated "on the fly".
As these records are event based and future values depend on past values, if a value is changed in the past it affects the future sums.
I have a collection with the properties as written bellow:
[{
_id: "5bfc2a16b4f11f3760ed4b64",
piece_id: "12345",
finish_date: "2018-11-26T17:15:09.795Z",
total_produced: 500
},
{
_id: "5bfc2a16b4f11f3760ed4b65",
piece_id: "12345",
finish_date: "2018-11-27T17:15:09.795Z",
total_produced: 750
},
{
_id: "5bfc2a16b4f11f3760ed4b66",
piece_id: "12345",
finish_date: "2018-11-28T17:15:09.795Z",
total_produced: 250
}]
The idea is to get a collection like this:
[{
_id: "5bfc2a16b4f11f3760ed4b64",
piece_id: "12345",
finish_date: "2018-11-26T17:15:09.795Z",
previous_value: 0,
total_produced: 500,
new_value: 500
},
{
_id: "5bfc2a16b4f11f3760ed4b65",
piece_id: "12345",
finish_date: "2018-11-27T17:15:09.795Z",
previous_value: 500,
total_produced: 750,
new_value: 1250
},
{
_id: "5bfc2a16b4f11f3760ed4b66",
piece_id: "12345",
finish_date: "2018-11-28T17:15:09.795Z",
previous_value: 1250,
total_produced: 250,
new_value: 1500
}]
Based on the finish_date property, I should be able to calculate the previous_value sums until that date, and the new_value will be the previous calculated sum plus the total_produced.
Previous value
previous_value = SUM(past total_produced) until finish_date
New Value
new_value = previous_value + total_produced
Based on these collection values I need to return a json array, because I'll allow users to download a spreadsheet.
You can try below aggregation to get the result, the logic is to use $reduce to calculate the running total
aggregation pipeline
db.t32.aggregate([
{$group : {_id : "$piece_id", data : {$push : "$$ROOT"}}},
{$addFields : {data :
{$reduce : {
input : "$data",
initialValue : [{previous_value : 0, total_produced : 0, new_value : 0}],
in : {$concatArrays :
[ "$$value",[{$mergeObjects : ["$$this", { previous_value : {$arrayElemAt : ["$$value.new_value", -1]} , total_produced : "$$this.total_produced", new_value : {$sum : ["$$this.total_produced",{$arrayElemAt : ["$$value.new_value", -1]}]}}]}]]
}
}}
}},
{$addFields : {data : {$slice : ["$data", 1, {$size : "$data"}]}}}
]).pretty()
sample collection
> db.t32.find()
{ "_id" : "5bfc2a16b4f11f3760ed4b64", "piece_id" : "12345", "finish_date" : "2018-11-26T17:15:09.795Z", "total_produced" : 500 }
{ "_id" : "5bfc2a16b4f11f3760ed4b65", "piece_id" : "12345", "finish_date" : "2018-11-27T17:15:09.795Z", "total_produced" : 750 }
{ "_id" : "5bfc2a16b4f11f3760ed4b66", "piece_id" : "12345", "finish_date" : "2018-11-28T17:15:09.795Z", "total_produced" : 250 }
aggregation result
> db.t32.aggregate([
... {$group : {_id : "$piece_id", data : {$push : "$$ROOT"}}},
... {$addFields : {data :
... {$reduce : {
... input : "$data",
... initialValue : [{previous_value : 0, total_produced : 0, new_value : 0}],
... in : {$concatArrays :
... [ "$$value",[{$mergeObjects : ["$$this", { previous_value : {$arrayElemAt : ["$$value.new_value", -1]} , total_produced : "$$this.total_produced", new_value : {$sum : ["$$this.total_produced",{$arrayElemAt : ["$$value.new_value", -1]}]}}]}]]
... }
... }}
... }},
... {$addFields : {data : {$slice : ["$data", 1,1000]}}}
... ]).pretty()
{
"_id" : "12345",
"data" : [
{
"_id" : "5bfc2a16b4f11f3760ed4b64",
"piece_id" : "12345",
"finish_date" : "2018-11-26T17:15:09.795Z",
"total_produced" : 500,
"previous_value" : 0,
"new_value" : 500
},
{
"_id" : "5bfc2a16b4f11f3760ed4b65",
"piece_id" : "12345",
"finish_date" : "2018-11-27T17:15:09.795Z",
"total_produced" : 750,
"previous_value" : 500,
"new_value" : 1250
},
{
"_id" : "5bfc2a16b4f11f3760ed4b66",
"piece_id" : "12345",
"finish_date" : "2018-11-28T17:15:09.795Z",
"total_produced" : 250,
"previous_value" : 1250,
"new_value" : 1500
}
]
}
>
I am not sure if such an aggregation is even possible in mongodb.
Sounds like a typical business logic for the sever side and therefore I would suggest fetching the relevant data to the server and perform all the computation there. That way things are much less complex and without any performance loss.

Mongodb - create ranking of values from field array

I am trying to sort a mongodb aggregate I don't what it is happening. I was searching some solution in stack overflow but they didn't work and I don't know why...
My idea is return a ranking of values from the field array (tags). I could achieve the list the sum of values but I can not sort it...
This is the query that I could do and it seems that it works:
db.getCollection("metadata").aggregate(
{$unwind: '$tags'},
{$group: {_id:'$tags.name', total: {$sum: 1}}}
);
Because I receive this result that It has sense:
{
"_id" : "kite",
"total" : 1.0
}
{
"_id" : "piggy bank",
"total" : 1.0
}
{
"_id" : "sorrel",
"total" : 1.0
}
{
"_id" : "eggnog"
"total" : 4.0
}
{
"_id" : "Weimaraner",
"total" : 1.0
}
{
"_id" : "bassinet",
"total" : 15.0
}
{
"_id" : "squirrel monkey",
"total" : 1.0
}
{
"_id" : "bath towel",
"total" : 6.0
}
TRIES
When I tried something like this:
db.getCollection("metadata").aggregate(
{$unwind: '$tags'},
{$group: {_id:'$tags.name', total: {$sum: 1}}},
{$sort: {total: -1}}
);
RESULT TRY:
{
"_id" : "baboon",
"total" : 12.0
}
{
"_id" : "snow leopard",
"total" : 4.0
}
{
"_id" : "green lizard",
"total" : 5.0
}
{
"_id" : "Dandie Dinmont",
"total" : 7.0
}
{
"_id" : "echidna",
"total" : 8.0
}
{
"_id" : "bee eater",
"total" : 6.0
}
or like this:
db.getCollection("metadata").aggregate(
{$unwind: '$tags'},
{$group: {_id: { name:'$tags.name', total: {$sum: 1}}}},
{$sort: {total: -1}}
);
The result doesn't sort or directly not sum the values...
EXTRA
This is the query if I want to list all the entries with the array:
db.getCollection('metadata').find({tags: {$exists: true}})
And the result is:
/* 2 */
{
"_id" : ObjectId("5900af3ff6844d2f7519fe13"),
"user_id" : 23,
"company_id" : 1,
"created" : ISODate("2017-04-26T14:31:27.000Z"),
"md5file" : "fdd30b1ca52e1c15f330f46c0079498c",
"path" : "/storage/emulated/0/DCIM/Camera/IMG_20160605_133703.jpg",
"image_width" : 3456,
"image_height" : 4608,
"originalTags" : [
{
"name" : "sleeping bag",
"percentage" : 0.7529412
},
{
"name" : "diaper",
"percentage" : 0.05490196
},
{
"name" : "bib",
"percentage" : 0.039215688
}
],
"tags" : [
{
"name" : "sleeping bag",
"percentage" : 0.7529412
}
]
}
/* 3 */
{
"_id" : ObjectId("5900af3ff6844d2f7519fe14"),
"user_id" : 23,
"company_id" : 1,
"created" : ISODate("2017-04-26T14:31:27.000Z"),
"md5file" : "22612c8bc99d1031146f7c9918555572",
"path" : "/storage/emulated/0/DCIM/Camera/IMG_20160605_164243.jpg",
"image_width" : 4608,
"image_height" : 3456,
"originalTags" : [
{
"name" : "bath towel",
"percentage" : 0.62352943
},
{
"name" : "quilt",
"percentage" : 0.101960786
},
{
"name" : "cradle",
"percentage" : 0.043137256
}
],
"tags" : [
{
"name" : "bath towel",
"percentage" : 0.62352943
}
]
}
Aggregation pipeline is an array. It should be wrapped in square brackets []:
db.getCollection("metadata").aggregate(
[
{$unwind: '$tags'},
{$group: {_id:'$tags.name', total: {$sum: 1}}},
{$sort: {total: -1}}
]
);

MongoDB: $mod operator in aggregation pipeline

I have a restaurants collection that contains 3772 documents and I am trying to calculate the total number of documents that contain a score in first element of the grades array that's a multiple of 7 using the aggregation framework.
Query:
db.restaurants.aggregate([
{$project: {remainder: {$mod: ["$grades.0.score", 7]},
restaurant_id: 1,
name: 1,
grades: 1
}
},
{$match: {remainder: {$eq: 0}}},
{$group: {_id: null, total: {$sum: 1}}}
])
However, I am getting an error message that's caused by the use of the $mod operator in the $project pipeline stage. The error message is the following:
$mod only supports numeric types, not Array and NumberDouble
However, both $grades.0.score and 7 are integers, right? What should I change to make this query work as intended?
Example document:
{
"_id" : ObjectId("57290430139a4a37132c9e93"),
"address" : {
"building" : "469",
"coord" : [
-73.961704,
40.662942
],
"street" : "Flatbush Avenue",
"zipcode" : "11225"
},
"borough" : "Brooklyn",
"cuisine" : "Hamburgers",
"grades" : [
{
"date" : ISODate("2014-12-30T00:00:00Z"),
"grade" : "A",
"score" : 8
},
{
"date" : ISODate("2014-07-01T00:00:00Z"),
"grade" : "B",
"score" : 23
},
{
"date" : ISODate("2013-04-30T00:00:00Z"),
"grade" : "A",
"score" : 12
},
],
"name" : "Wendy'S",
"restaurant_id" : "30112340"
}
instead of $grades.0.score
put $grades[0].score
in your query.
the above is wrong. see below the correct form. As you want to filter by grades whose first score is a multiple of 7, you aggregation should start like this.
db.restaurants.aggregate([{$match: {"grades.0.score": {$mod: [7, 0]}}},{$group: {_id: null, total: {$sum: 1}}}])
I changed the grade.0.score to 7 and ran the command to check it is working or not, it seems it is working as you wanted.
> db.restaurants.find().pretty();
{
"_id" : 0,
"address" : {
"building" : "469",
"coord" : [
-73.961704,
40.662942
],
"street" : "Flatbush Avenue",
"zipcode" : "11225"
},
"borough" : "Brooklyn",
"cuisine" : "Hamburgers",
"grades" : [
{
"date" : ISODate("2014-12-30T00:00:00Z"),
"grade" : "A",
"score" : 7
},
{
"date" : ISODate("2014-07-01T00:00:00Z"),
"grade" : "B",
"score" : 23
},
{
"date" : ISODate("2013-04-30T00:00:00Z"),
"grade" : "A",
"score" : 12
}
],
"name" : "Wendy'S",
"restaurant_id" : "30112340"
> db.restaurants.aggregate([{$match: {"grades.0.score": {$mod: [7, 0]}}},{$group:{_id:null,count:{$sum:1}}} ])
{ "_id" : null, "count" : 1 }
First: why doesn't it work? Try:
db.restaurants.aggregate([
{$project: {
score0: "$grades.0.score",
restaurant_id: 1,
name: 1
}
}
])
You'll see that score0 returns [0 elements] so it does output an array hence the error message.
Based on this other question Get first element in array and return using Aggregate? (Mongodb), here is a solution to your problem:
db.restaurants.aggregate([
{$unwind: "$grades"},
{$group:{"_id":"$_id","grade0":{$first:"$grades"}}},
{$project: {
remainder: {$mod: ["$grade0.score", 7]},
restaurant_id: 1,
name: 1,
grade0: 1,
}
},
{$match: {remainder: {$eq: 0}}},
{$group: {_id: null, total: {$sum: 1}}}
])

Complex query of MongoDB

I have looked up the manual of MongoDB, but cannot find a proper solution to do the following search:
Document structure:
{
"_id" : ObjectId("57201082e92c07baef62452a"),
"user_id" : 1,
"profile" : [
{
"date" : ISODate("2012-10-11T12:58:06.000Z"),
"content" : [
{
"feature_id" : 1,
"feature_value" : true
}
]
},
{
"date" : ISODate("2013-04-12T8:23:09.000Z"),
"content" : [
{
"feature_id" : 1,
"feature_value" : false
}
]
}
]
}
What I want to get is the LATEST feature_value of a given user_id and a given feature_id.
Is there any way to accomplish this using MongoDB?
Thanks.
Assuming the latest(as profile.date) you can use the following to get the desired result.(query not tested, hope it works).
db.test.aggregate(
[
{$match : {"user_id" : 1}},
{$unwind : "$profile"},
{$unwind : "$profile.content"},
{$match : {"profile.content.feature_id" : 1}},
{$sort : {"profile.date" : -1}},
{$limit : 1},
{$group:{_id : "$_id", content : {$push:"$profile.content"}}}
])
Please try this:
db.test.aggregate(
{$match:{"user_id" :1,"profile.content.feature_id":1}},
{$project:{"profile.content.feature_value":1}},
{$sort:{"profile.date" :-1}},
{$limit : 1});

Exact Reduce function

I have a collection of the type :
{
"_id" : ObjectId("51f1fcc08188d3117c6da351"),
"cust_id" : "abc123",
"ord_date" : ISODate("2012-10-03T18:30:00Z"),
"status" : "A",
"price" : 25,
"items" : [{
"sku" : "ggg",
"qty" : 7,
"price" : 2.5
}, {
"sku" : "ppp",
"qty" : 5,
"price" : 2.5
}]
}
I want to fetch only the "items" object whose "items.qty">5 and and"items.sku"=="ggg".
I applied Map reduce:
cmd { "mapreduce" : "orders" , "map" : "function map(){var items_out={items:[]};for(i in this.items){items_out.items.push(this.items[i].sku);};emit(this._id,[items_out]);}" , "reduce" : "function reduce(key,values){return {'result':values};}" , "verbose" : true , "out" : { "replace" : "map_reduce"} , "query" : { "$where" : "return this.items.some(function(entry){return entry.qty>5})&&this.items.some(function(entry){return entry.sku=='ggg'})"}},
but I am getting all the sku values something like this:
{ "data": [ { "items": [ "ggg", "ppp" ] } ]}
Whereas it should give only ggg as this is the only value matching criteria.
Use the following command:
db.orders.aggregate(
{$unwind : "$items"},
{$match : {"items.qty": {$gt: 5 }}},
{$match : {"items.sku" : "ggg"}},
{$project : {_id:0, items:1}}
)