Mongo - Aggregate Group Lookup Values - mongodb

I'm trying to sum up values that are listed within a lookup. I have 2 collections:
Subscriptions (Pseudocode)
{
_id,
purchaseDate: Date,
wholesalePrice: Number,
purchasePrice: Number,
retailPrice: Number,
userID: String,
}
Payments
{
_id,
statementMonth: String, // e.g. 2020-08 (same as grouped id in Subscriptions)
paymentDate: Date,
userID: String,
amountPaid
}
Users will need to transfer profit margin value at end of month. Therefore, I want to create an output for Statements. These will have all Subscriptions and Payments grouped into monthly records, with a summary of all the data within them. I have managed to create everything up to the first group, however once I do my lookup to get Payment details, everything seems to fail.
This is my current pipeline
{
"$match": {
"userID": [provided UserID]
}
},
{
"$group": {
"_id": {
"$dateToString": {
"format": "%Y-%m",
"date": "$purchaseDate"
}
},
"totalWholesalePrice": {
"$sum": "$wholesalePrice"
},
"totalPurchasePrice": {
"$sum": "$purchasePrice"
},
"count": {
"$sum": 1.0
}
}
},
{
"$addFields": {
"totalAmountDue": {
"$subtract": [
"$totalPurchasePrice",
"$totalWholesalePrice"
]
}
}
},
{
"$lookup": {
"from": "transactions",
"localField": "_id",
"foreignField": "statementMonth",
"as": "transactions"
}
},
{
"$unwind": {
"path": "$transactions",
"preserveNullAndEmptyArrays": true
}
},
{
"$sort": {
"_id": -1.0
}
}
This returns 2 records if 2 transactions:
{
"_id" : "2020-08",
"totalWholesalePrice" : NumberInt(89),
"totalPurchasePrice" : 135.55,
"count" : 8.0,
"totalAmountDue" : 46.55,
"transactions" : {
"_id" : ObjectId("5f3faf2216d7a517bc51bfae"),
"date" : ISODate("2020-04-20T11:23:40.284+0000"),
"statementMonth" : "2020-08",
"merchant" : "M1268360",
"amountPaid" : "40"
}
}
{
"_id" : "2020-08",
"totalWholesalePrice" : NumberInt(89),
"totalPurchasePrice" : 135.55,
"count" : 8.0,
"totalAmountDue" : 46.55,
"transactions" : {
"_id" : ObjectId("5f3fc13f16d7a517bc51c047"),
"date" : ISODate("2020-04-20T11:23:40.284+0000"),
"statementMonth" : "2020-08",
"merchant" : "M1268360",
"amountPaid" : "2"
}
}
I would like final JSON to be:
{
"_id" : "2020-08",
"totalWholesalePrice" : NumberInt(89),
"totalPurchasePrice" : 135.55,
"count" : 8.0,
"totalAmountDue" : 46.55,
"transactions" : [{
"_id" : ObjectId("5f3faf2216d7a517bc51bfae"),
"date" : ISODate("2020-04-20T11:23:40.284+0000"),
"statementMonth" : "2020-08",
"merchant" : "M1268360",
"amountPaid" : "40"
}
{
"_id" : ObjectId("5f3fc13f16d7a517bc51c047"),
"date" : ISODate("2020-04-20T11:23:40.284+0000"),
"statementMonth" : "2020-08",
"merchant" : "M1268360",
"amountPaid" : "2"
}],
"totalPaid" : 42,
"totalBalance" : 4.55,
}

You need to add one more pipeline
db.collection.aggregate([
{
$group: {
"_id": "$id",
"transactions": { //Grouping transactions together
"$addToSet": "$transactions"
},
"totalWholesalePrice": {//As all values are unique getting the first
"$first": "$totalWholesalePrice"
}
//Similarly add the needed fields - use $first, $addToSet
}
}
])
playground
To get the total:
db.collection.aggregate([
{
$group: {
"_id": "$id",
"transactions": {
"$addToSet": "$transactions"
},
"totalWholesalePrice": {
"$first": "$totalWholesalePrice"
},
"totalAmountPaid": {
$sum: {
$toInt: "$transactions.amountPaid" //you stored as string, convert to appropriate type
}
}
}
}
])
playground

Related

mongodb: match, group by multiple fields, project and count

So I'm learning mongodb and I got a collection of writers to train.
Here I'm trying to count works by sorting them by country and gender of the author. This is what I accoplished so far:
db.writers.aggregate([
{ "$match": { "gender": {"$ne": male}}},
{ "$group": {
"_id": {
"country_id": "$country_id",
"type": "$type"
},
}},
{ "$group": {
"_id": "$_id.country_id",
"literary_work": {
"$push": {
"type": "$_id.type",
"count": { "$sum": "$type" }
}
},
"total": { "$sum": "$type" }
}},
{ "$sort": { "country_id": 1 } },
{ "$project": {
"literary_work": { "$slice": [ "$literary_work", 3 ] },
"total": { "$sum": "$type" }
}}
])
Sadly, the output that I get is not the one I'm expecting:
"_id" : GREAT BRITAIN,
"literary_work" : [
{
"type" : "POEM",
"count" : 0
},
{
"type" : "NOVEL",
"count" : 0
},
{
"type" : "SHORT STORY",
"count" : 0
}
],
"total" : 0
Could anyone tell me where do I insert the count stage or what is my mistake?)
upd:
Data sample:
{
"_id" : ObjectId("5f115c5d5f62f9f482cd7a49"),
"author" : George Sand,
"gender" : female,
"country_id" : FRANCE,
"title": "Consuelo",
"type" : "NOVEL",
}
Expected result (NB! this is a result for both genders):
{
"_id" : FRANCE,
"count" : 59.0,
"literary_work" : [
{
"type" : "POEM",
"count" : 14.0
},
{
"type" : "NOVEL",
"count" : 34.0
},
{
"type" : "SHORT STORY",
"count" : 11.0
}
]
}
Your implementation is correct way but there are missing things:
missed count in first $group
on the base of first group count it can count whole count of literary_work
and $project is not needed from your query
Corrected things in query,
db.writers.aggregate([
{
$match: {
gender: { $ne: "male" }
}
},
{
$group: {
_id: {
country_id: "$country_id",
type: "$type"
},
// missed this
count: { $sum: 1 }
}
},
{
$group: {
_id: "$_id.country_id",
// this count will be on the base of first group count
count: { $sum: "$count" },
literary_work: {
$push: {
type: "$_id.type",
// add count in inner count
count: "$count"
}
}
}
},
// corrected from country_id to _id
{
$sort: { "_id": 1 }
}
])
Working Playground: https://mongoplayground.net/p/JWP7qdDY6cc

Combine results based on condition during group by

Mongo query generated out of java code:
{
"pipeline": [{
"$match": {
"Id": "09cd9a5a-85c5-4948-808b-20a52d92381a"
}
},
{
"$group": {
"_id": "$result",
"id": {
"$first": "$result"
},
"labelKey": {
"$first": {
"$ifNull": ["$result",
"$result"]
}
},
"value": {
"$sum": 1
}
}
}]
}
Field 'result' can have values like Approved, Rejected, null and "" (empty string). What I am trying to achieve is combining the count of both null and empty together.
So that the empty string Id will have the count of both null and "", which is equal to 4
I'm sure theres a more "proper" way but this is what i could quickly come up with:
[
{
"$group" : {
"_id" : "$result",
"id" : {
"$first" : "$result"
},
"labelKey" : {
"$first" : {
"$ifNull" : [
"$result",
"$result"
]
}
},
"value" : {
"$sum" : 1.0
}
}
},
{
"$group" : {
"_id" : {
"$cond" : [{
$or: [
{"$eq": ["$_id", "Approved"]},
{"$eq": ["$_id", "Rejected"]},
]}},
"$_id",
""
]
},
"temp" : {
"$push" : {
"_id" : "$_id",
"labelKey" : "$labelKey"
}
},
"count" : {
"$sum" : "$value"
}
}
},
{
"$unwind" : "$temp"
},
{
"$project" : {
"_id" : "$temp._id",
"labelKey": "$temp.labelKey",
"count" : "$count"
}
}
],
);
Due to the fact the second group is only on 4 documents tops i don't feel too bad about doing this.
I have used $facet.
The MongoDB stage $facet lets you run several independent pipelines within the stage of a pipeline, all using the same data. This means that you can run several aggregations with the same preliminary stages, and successive stages.
var queries = [{
"$match": {
"Id": "09cd9a5a-85c5-4948-808b-20a52d92381a"
}
},{
$facet: {//
"empty": [
{
$match : {
result : { $in : ['',null]}
}
},{
"$group" : {
"_id" : null,
value : { $sum : 1}
}
}
],
"non_empty": [
{
$match : {
result : { $nin : ['',null]}
}
},{
"$group" : {
"_id" : '$result',
value : { $sum : 1}
}
}
]
}
},
{
$project: {
results: {
$concatArrays: [ "$empty", "$non_empty" ]
}
}
}];
Output :
{
"results": [{
"_id": null,
"value": 52 // count of both '' and null.
}, {
"_id": "Approved",
"value": 83
}, {
"_id": "Rejected",
"value": 3661
}]
}
Changing the group by like below solved the problem
{
"$group": {
"_id": {
"$ifNull": ["$result", ""]
},
"id": {
"$first": "$result"
},
"labelKey": {
"$first": {
"$ifNull": ["$result",
"$result"]
}
},
"value": {
"$sum": 1
}
}
}

Is it recommended to use unwind in working with large amount of data with nested documents on MongoDB [duplicate]

I am new in mongodb and trying to work with nested documents.I have a query as below
db.EndpointData.aggregate([
{ "$group" : { "_id" : "$EndpointId", "RequestCount" : { "$sum" : 1 }, "FirstActivity" : { "$min" : "$DateTime" }, "LastActivity" : { "$max" : "$DateTime" }, "Tags" : { "$push" : "$Tags" } } },
{ "$unwind" : "$Tags" },
{ "$unwind" : "$Tags" },
{ "$group" : { "_id" : "$_id", "RequestCount" : { "$first" : "$RequestCount" }, "Tags" : { "$push" : "$Tags" }, "FirstActivity" : { "$first" : "$FirstActivity" }, "LastActivity" : { "$first" : "$LastActivity" } } },
{ "$unwind" : "$Tags" },
{ "$unwind" : "$Tags.Sensors" },
{ "$group" : { "_id" : { "EndpointId" : "$_id", "Uid" : "$Tags.Uid", "Type" : "$Tags.Sensors.Type" }, "RequestCount" : { "$first" : "$RequestCount" }, "FirstActivity" : { "$first" : "$FirstActivity" }, "LastActivity" : { "$first" : "$LastActivity" } } },
{ "$group" : { "_id" : { "EndpointId" : "$_id.EndpointId", "Uid" : "$_id.Uid" }, "count" : { "$sum" : 1 }, "RequestCount" : { "$first" : "$RequestCount" }, "FirstActivity" : { "$first" : "$FirstActivity" }, "LastActivity" : { "$first" : "$LastActivity" } } },
{ "$group" : { "_id" : "$_id.EndpointId", "TagCount" : { "$sum" : 1 }, "SensorCount" : { "$sum" : "$count" }, "RequestCount" : { "$first" : "$RequestCount" }, "FirstActivity" : { "$first" : "$FirstActivity" }, "LastActivity" : { "$first" : "$LastActivity" } } }])
and my data structure is as below
{
"_id": "6aef51dfaf42ea1b70d0c4db",
"EndpointId": "98799bcc-e86f-4c8a-b340-8b5ed53caf83",
"DateTime": "2018-05-06T19:05:02.666Z",
"Url": "test",
"Tags": [
{
"Uid": "C1:3D:CA:D4:45:11",
"Type": 1,
"DateTime": "2018-05-06T19:05:02.666Z",
"Sensors": [
{
"Type": 1,
"Value": { "$numberDecimal": "-95" }
},
{
"Type": 2,
"Value": { "$numberDecimal": "-59" }
},
{
"Type": 3,
"Value": { "$numberDecimal": "11.029802536740132" }
}
]
},
{
"Uid": "C1:3D:CA:D4:45:11",
"Type": 1,
"DateTime": "2018-05-06T19:05:02.666Z",
"Sensors": [
{
"Type": 1,
"Value": { "$numberDecimal": "-92" }
},
{
"Type": 2,
"Value": { "$numberDecimal": "-59" }
}
]
}
]
}
This query works fine and correct. I count Tags, Sensors and repeat times of each EdpointID. But the problem is when I work with large size of data (about 10,000,000 documents) I get memory problem. It seems having 4 levels of unwind make problem in this query. How can I reduce unwinds in this query?
As long as your data has unique sensor and tag readings per document, which to date what you have presented appears to, then you simply don't need $unwind at all.
In fact, all you really need is a single $group:
db.endpoints.aggregate([
// In reality you would $match to limit the selection of documents
{ "$match": {
"DateTime": { "$gte": new Date("2018-05-01"), "$lt": new Date("2018-06-01") }
}},
{ "$group": {
"_id": "$EndpointId",
"FirstActivity" : { "$min" : "$DateTime" },
"LastActivity" : { "$max" : "$DateTime" },
"RequestCount": { "$sum": 1 },
"TagCount": {
"$sum": {
"$size": { "$setUnion": ["$Tags.Uid",[]] }
}
},
"SensorCount": {
"$sum": {
"$sum": {
"$map": {
"input": { "$setUnion": ["$Tags.Uid",[]] },
"as": "tag",
"in": {
"$size": {
"$reduce": {
"input": {
"$filter": {
"input": {
"$map": {
"input": "$Tags",
"in": {
"Uid": "$$this.Uid",
"Type": "$$this.Sensors.Type"
}
}
},
"cond": { "$eq": [ "$$this.Uid", "$$tag" ] }
}
},
"initialValue": [],
"in": { "$setUnion": [ "$$value", "$$this.Type" ] }
}
}
}
}
}
}
}
}}
])
Or if you actually do need to accumulate those "unique" values of "Sensors" and "Tags" from across different documents, then you still need initial $unwind statements to get the right grouping, but nowhere near as much as you presently have:
db.endpoints.aggregate([
// In reality you would $match to limit the selection of documents
{ "$match": {
"DateTime": { "$gte": new Date("2018-05-01"), "$lt": new Date("2018-06-01") }
}},
{ "$unwind": "$Tags" },
{ "$unwind": "$Tags.Sensors" },
{ "$group": {
"_id": {
"EndpointId": "$EndpointId",
"Uid": "$Tags.Uid",
"Type": "$Tags.Sensors.Type"
},
"FirstActivity": { "$min": "$DateTime" },
"LastActivity": { "$max": "$DateTime" },
"RequestCount": { "$addToSet": "$_id" }
}},
{ "$group": {
"_id": {
"EndpointId": "$_id.EndpointId",
"Uid": "$_id.Uid",
},
"FirstActivity": { "$min": "$FirstActivity" },
"LastActivity": { "$max": "$LastActivity" },
"count": { "$sum": 1 },
"RequestCount": { "$addToSet": "$RequestCount" }
}},
{ "$group": {
"_id": "$_id.EndpointId",
"FirstActivity": { "$min": "$FirstActivity" },
"LastActivity": { "$max": "$LastActivity" },
"TagCount": { "$sum": 1 },
"SensorCount": { "$sum": "$count" },
"RequestCount": { "$addToSet": "$RequestCount" }
}},
{ "$addFields": {
"RequestCount": {
"$size": {
"$reduce": {
"input": {
"$reduce": {
"input": "$RequestCount",
"initialValue": [],
"in": { "$setUnion": [ "$$value", "$$this" ] }
}
},
"initialValue": [],
"in": { "$setUnion": [ "$$value", "$$this" ] }
}
}
}
}}
],{ "allowDiskUse": true })
And from MongoDB 4.0 you can use $toString on the ObjectId within _id and simply merge the unique keys for those in order to keep the RequestCount using $mergeObjects. This is cleaner and a bit more scalable than pushing nested array content and flattening it
db.endpoints.aggregate([
// In reality you would $match to limit the selection of documents
{ "$match": {
"DateTime": { "$gte": new Date("2018-05-01"), "$lt": new Date("2018-06-01") }
}},
{ "$unwind": "$Tags" },
{ "$unwind": "$Tags.Sensors" },
{ "$group": {
"_id": {
"EndpointId": "$EndpointId",
"Uid": "$Tags.Uid",
"Type": "$Tags.Sensors.Type"
},
"FirstActivity": { "$min": "$DateTime" },
"LastActivity": { "$max": "$DateTime" },
"RequestCount": {
"$mergeObjects": {
"$arrayToObject": [[{ "k": { "$toString": "$_id" }, "v": 1 }]]
}
}
}},
{ "$group": {
"_id": {
"EndpointId": "$_id.EndpointId",
"Uid": "$_id.Uid",
},
"FirstActivity": { "$min": "$FirstActivity" },
"LastActivity": { "$max": "$LastActivity" },
"count": { "$sum": 1 },
"RequestCount": { "$mergeObjects": "$RequestCount" }
}},
{ "$group": {
"_id": "$_id.EndpointId",
"FirstActivity": { "$min": "$FirstActivity" },
"LastActivity": { "$max": "$LastActivity" },
"TagCount": { "$sum": 1 },
"SensorCount": { "$sum": "$count" },
"RequestCount": { "$mergeObjects": "$RequestCount" }
}},
{ "$addFields": {
"RequestCount": {
"$size": {
"$objectToArray": "$RequestCount"
}
}
}}
],{ "allowDiskUse": true })
Either form returns the same data, though the order of keys in the result may vary:
{
"_id" : "89799bcc-e86f-4c8a-b340-8b5ed53caf83",
"FirstActivity" : ISODate("2018-05-06T19:05:02.666Z"),
"LastActivity" : ISODate("2018-05-06T19:05:02.666Z"),
"RequestCount" : 2,
"TagCount" : 4,
"SensorCount" : 16
}
The result is obtained from these sample documents which you originally gave as a sample source in the original question on the topic:
{
"_id" : ObjectId("5aef51dfaf42ea1b70d0c4db"),
"EndpointId" : "89799bcc-e86f-4c8a-b340-8b5ed53caf83",
"DateTime" : ISODate("2018-05-06T19:05:02.666Z"),
"Url" : "test",
"Tags" : [
{
"Uid" : "C1:3D:CA:D4:45:11",
"Type" : 1,
"DateTime" : ISODate("2018-05-06T19:05:02.666Z"),
"Sensors" : [
{
"Type" : 1,
"Value" : NumberDecimal("-95")
},
{
"Type" : 2,
"Value" : NumberDecimal("-59")
},
{
"Type" : 3,
"Value" : NumberDecimal("11.029802536740132")
},
{
"Type" : 4,
"Value" : NumberDecimal("27.25")
},
{
"Type" : 6,
"Value" : NumberDecimal("2924")
}
]
},
{
"Uid" : "C1:3D:CA:D4:45:11",
"Type" : 1,
"DateTime" : ISODate("2018-05-06T19:05:02.666Z"),
"Sensors" : [
{
"Type" : 1,
"Value" : NumberDecimal("-95")
},
{
"Type" : 2,
"Value" : NumberDecimal("-59")
},
{
"Type" : 3,
"Value" : NumberDecimal("11.413037961112279")
},
{
"Type" : 4,
"Value" : NumberDecimal("27.25")
},
{
"Type" : 6,
"Value" : NumberDecimal("2924")
}
]
},
{
"Uid" : "E5:FA:2A:35:AF:DD",
"Type" : 1,
"DateTime" : ISODate("2018-05-06T19:05:02.666Z"),
"Sensors" : [
{
"Type" : 1,
"Value" : NumberDecimal("-97")
},
{
"Type" : 2,
"Value" : NumberDecimal("-58")
},
{
"Type" : 3,
"Value" : NumberDecimal("10.171658037099185")
}
]
}
]
}
/* 2 */
{
"_id" : ObjectId("5aef51e0af42ea1b70d0c4dc"),
"EndpointId" : "89799bcc-e86f-4c8a-b340-8b5ed53caf83",
"Url" : "test",
"Tags" : [
{
"Uid" : "E2:02:00:18:DA:40",
"Type" : 1,
"DateTime" : ISODate("2018-05-06T19:05:04.574Z"),
"Sensors" : [
{
"Type" : 1,
"Value" : NumberDecimal("-98")
},
{
"Type" : 2,
"Value" : NumberDecimal("-65")
},
{
"Type" : 3,
"Value" : NumberDecimal("7.845424441900629")
},
{
"Type" : 4,
"Value" : NumberDecimal("0.0")
},
{
"Type" : 6,
"Value" : NumberDecimal("3012")
}
]
},
{
"Uid" : "12:3B:6A:1A:B7:F9",
"Type" : 1,
"DateTime" : ISODate("2018-05-06T19:05:04.574Z"),
"Sensors" : [
{
"Type" : 1,
"Value" : NumberDecimal("-95")
},
{
"Type" : 2,
"Value" : NumberDecimal("-59")
},
{
"Type" : 3,
"Value" : NumberDecimal("12.939770381907275")
}
]
}
]
}
Bottom line is that you can either use the first given form here which will accumulate "within each document" and then "accumulate per endpoint" within a single stage and is the most optimal, or you actually require to identify things like the "Uid" on the tags or the "Type" on the sensor where those values occur more than once over any combination of documents grouping by the endpoint.
Your sample data supplied to date only shows that these values are "unique within each document", therefore the first given form would be most optimal if this is the case for all remaining data.
In the event that it is not, then "unwinding" the two nested arrays in order to "aggregate the detail across documents" is the only way to approach this. You can limit the date range or other criteria as most "queries" typically have some bounds and do not actually work on the "whole" collection data, but the main fact remains that arrays would be "unwound" creating essentially a document copy for every array member.
The point on optimization means that you only need to do this "twice" as there are only two arrays. Doing successive $group to $unwind to $group is always a sure sign you a doing something really wrong. Once you "take something apart" you should only ever need to "put it back together" once. In a series of graded steps as demonstrated here is the once approach which optimizes.
Outside of the scope of your question still remains:
Add other realistic constraints to the query to reduce the documents processed, maybe even do so in "batches" and combine results
Add the allowDiskUse option to the pipeline to let temporary storage be used. ( actually demonstrated on the commands )
Consider that "nested arrays" are probably not the best storage method for the analysis you want to do. It's always more efficient when you know you need to $unwind to simply write the data in that "unwound" form directly into a collection.
If you're dealing with data on the order of 10,000,000 documents, you're going to run into aggregation pipeline size limits easily. Specifically, according to the MongoDB documentation, there is a pipeline RAM use limit of 100MB. If each document has at least 10 bytes of data, then that's enough to hit that limit, and your documents would absolutely exceed that amount.
There are a few options available to you to resolve this problem:
1) You can use the allowDiskUse option as noted in the documentation.
2) You can project your documents further between unwind stages to limit document size (very unlikely to be enough on its own).
3) You can periodically generate summary documents on subsets of your data, and then perform your aggregations on those summary documents. If, for example, you run summary documents on subsets of size 1,000, you can reduce the number of documents in your pipelines from 10,000,000 to just 10,000.
4) You can look into sharding your collection and running these aggregate operations on a cluster to reduce the load on any single server.
Options 1 and 2 are both very short-term solutions. They're easy to implement, but won't help much in the long run. Options 3 and 4, however, are far more involved and trickier to implement, but will provide the greatest amount of scalability and are more likely to continue meeting your needs long-term.
Do be warned, however, that if you plan to approach option 4, you need to be very prepared. A sharded collection cannot be unsharded, and messing up can cause potentially irreparable data loss. Having a dedicated DBA with experience with MongoDB clusters is recommended.

Rewind data of two nested array field after $unwind and $lookup and $filter on date range in $project

{
"_id" : ObjectId("590b12b6330e1567acd29e69"),
"name": "Foo",
"sales_history" : [
{
"_id" : ObjectId("593ce8e4cfaa652df543d9e3"),
"sold_at" : ISODate("2017-06-11T06:53:24.881Z"),
"sold_to" : ObjectId("593509e938792e046ba14a02"),
"sold_products" : [
{
"product_dp" : 100,
"quantity" : 1,
"product_id" : ObjectId("591068be1f4c6c79a442a788"),
"_id" : ObjectId("593ce8e4cfaa652df543d9e5")
},
{
"product_dp" : 100,
"quantity" : 1,
"product_id" : ObjectId("593a33dccfaa652df543d924"),
"_id" : ObjectId("593ce8e4cfaa652df543d9e4")
}
]
},
{
"_id" : ObjectId("5944cb7142a04740357020b9"),
"sold_at" : ISODate("2017-06-17T06:25:53.332Z"),
"sold_to" : ObjectId("5927d4a59e58ba0c61066f3b"),
"sold_products" : [
{
"product_dp" : 500,
"quantity" : 1,
"price" : 5650,
"product_id" : ObjectId("593191ed53a2741dd9bffeb5"),
"_id" : ObjectId("5944cb7142a04740357020ba")
}
]
}
]
}
I have User schema like this. I want detail of product_id reference, with a date range search criteria on sold_at date field.
My expected data like following when I searched in sold_at at: 2017-06-11
{
"_id" : ObjectId("590b12b6330e1567acd29e69"),
"name": "Foo",
"sales_history" : [
{
"_id" : ObjectId("593ce8e4cfaa652df543d9e3"),
"sold_at" : ISODate("2017-06-11T06:53:24.881Z"),
"sold_to" : ObjectId("593509e938792e046ba14a02"),
"sold_products" : [
{
"product_dp" : 100,
"quantity" : 1,
"product_id": {
_id:ObjectId("hsfgg123412yh3gy1u2g3"),
name: "Product1",
code: "FG0154"
},
}
]
}
]
}
Product detail need to be populate in product_id, sales_history array need to be filtered in date range.
You can try below aggregation query.
$filter sales history on date range followed by $unwinding sales history & sold_products.
$lookup sold_products to get the product details.
$group back sold_products & sales history
db.collection.aggregate([
{
"$project": {
"name": 1,
"sales_history": {
"$filter": {
"input": "$sales_history",
"as": "history",
"cond": {
"$and": [
{
"$gte": [
"$$history.sold_at",
ISODate("2017-06-11T00:00:00.000Z")
]
},
{
"$lt": [
"$$history.sold_at",
ISODate("2017-06-12T00:00:00.000Z")
]
}
]
}
}
}
}
},
{
"$unwind": "$sales_history"
},
{
"$unwind": "$sales_history.sold_products"
},
{
"$lookup": {
"from": lookupcollection,
"localField": "sales_history.sold_products.product_id",
"foreignField": "_id",
"as": "sales_history.sold_products.product_id"
}
},
{
"$group": {
"_id": {
"_id": "$_id",
"sales_history_id": "$sales_history._id"
},
"name": {
"$first": "$name"
},
"sold_at": {
"$first": "$sales_history.sold_at"
},
"sold_to": {
"$first": "$sales_history.sold_to"
},
"sold_products": {
"$push": "$sales_history.sold_products"
}
}
},
{
"$group": {
"_id": "$_id._id",
"name": {
"$first": "$name"
},
"sales_history": {
"$push": {
"_id": "$_id.sales_history_id",
"sold_at": "$sold_at",
"sold_to": "$sold_to",
"sold_products": "$sold_products"
}
}
}
}
]);

How to find sum of total value from inner array?

{
"_id" : ObjectId("56fb04fd2e6bb8bc059287c9"),
"BillNo" : "Bill_001",
"DateP" : "12-12-2015",
"Type" : "Cash",
"Items" : [
{
"id" : NumberInt(1),
"ItemName" : "cement",
"Qty" : "100",
"Rate" : "10",
"Total" : "1000"
},
{
"id" : NumberInt(2),
"ItemName" : "steel",
"Qty" : "10",
"Rate" : "50",
"Total" : "500"
},
{
"id" : NumberInt(3),
"ItemName" : "sand",
"Qty" : "1",
"Rate" : "1500",
"Total" : "1500"
}
]
}
{
"_id" : ObjectId("56fb05382e6bb8bc059287ca"),
"BillNo" : "Bill_002",
"DateP" : "12-10-2015",
"Type" : "Cash",
"Items" : [
{
"id" : NumberInt(1),
"ItemName" : "Paint",
"Qty" : "50",
"Rate" : "100",
"Total" : "5000"
},
{
"id" : NumberInt(2),
"ItemName" : "Brush",
"Qty" : "5",
"Rate" : "10",
"Total" : "50"
}
]
}
In the above collection stores all the purchase details in main document and its Items details storing as inner array of main item.I need to get the result like following by using mongodb; How to find total from inner array in mongodb.
Bill_001 1500
Bill_002 5050
Ideally in MongoDB you can use $map with $sum as both an $group accumulator and it's new role in adding the members of the provided array:
db.collection.aggregate({
{ "$group": {
"_id": "$BillNo",
"Total": {
"$sum": {
"$sum": {
"$map": {
"input": "$Items",
"as": "item",
"in": "$$item.Total"
}
}
}
}
}}
})
Or just per document:
db.collection.aggregate({
{ "$group": {
"_id": "$_id",
"BillNo": { "$first": "$BillNo" },
"DateP": { "$first" "$DateP" },
"Type": { "$first": "$Type" }
"Total": {
"$sum": {
"$sum": {
"$map": {
"input": "$Items",
"as": "item",
"in": "$$item.Total"
}
}
}
}
}}
})
Using the other accumulator of $first. Of course you could really just $project With MongoDB 3.2:
db.collection.aggregate({
{ "$project": {
"BillNo": 1,
"DateP": 1,
"Type": 1,
"Total": {
"$sum": {
"$map": {
"input": "$Items",
"as": "item",
"in": "$$item.Total"
}
}
}
}}
})
In older versions you still need $unwind on the array first:
db.collection.aggregate([
{ "$unwind": "$Items" },
{ "$group": {
"_id": "$BillNo",
"Total": {
"$sum": "$Items.Total"
}
}}
])
Or if you are only adding per document:
db.collection.aggregate([
{ "$unwind": "$Items" },
{ "$group": {
"_id": "_id",
"BillNo": { "$first": "$BillNo" },
"DateP": { "$first": "$DateP" },
"Type": { "$first": "$Type" },
"Total": {
"$sum": "$Items.Total"
}
}}
])
But only of course once you actually fix the strings to be numeric values.
Ideally you can fix it like this:
var ops = [];
db.collection.find().forEach(function(doc) {
doc.Items.forEach(function(item) {
ops.push({
"updateOne": {
"filter": { "_id": doc._id, "Items.id": item.id },
"update": {
"$set": {
"Items.$.Qty": parseInt(item.Qty),
"Items.$.Rate": parseInt(item.Rate),
"Items.$Total": parseInt(item.Total)
}
}
}
});
// Send batch of updates
if ( ops.length == 1000 ) {
db.collection.bulkWrite(ops);
ops = [];
}
})
});
// Clear any unprocessed updates
if ( ops.length > 0 ) {
db.collection.bulkWrite(ops);
}