Compare two array fields in same document

Compare two array fields in same document - mongodb

I have two different Arrays with Data and I have to merge two arrays same object in one object how can I merge with help of MongoDB Aggregation.
here are my two Arrays
{"ids" : [
{
"_id" : ObjectId("5ba8d8dfaa988532967029af"),
"level" : 2,
"completed" : 5,
"asset" : ObjectId("5ba8caa1aa98853296702989")
},
{
"_id" : ObjectId("5ba8d8dfaa988532967029b0"),
"level" : 2,
"completed" : 3,
"asset" : ObjectId("5ba8caf6aa9885329670298a")
},
{
"_id" : ObjectId("5ba8d8dfaa988532967029b1"),
"level" : 2,
"asset" : ObjectId("5ba8cb09aa9885329670298b")
}]}
{"total" : [
{
"total" : 1,
"asset" : ObjectId("5ba8caa1aa98853296702989"),
"level" : 2
},
{
"total" : 1,
"asset" : ObjectId("5ba8caf6aa9885329670298a"),
"level" : 2
},
{
"total" : 1,
"asset" : ObjectId("5ba8cb09aa9885329670298b"),
"level" : 2
}]}
in upper two arrays data are shown I want to merge only that object whose asset and level are same
Expected Result:[{
"level" : 2,
"asset" : ObjectId("5ba8caa1aa98853296702989"),
"total" : 1,
"completed" : 5,
},{
"level" : 2,
"asset" : ObjectId("5ba8caf6aa9885329670298a"),
"total" : 1,
"completed" : 3,
}]

if you already have two objects then you can do like this. but if you need to get it from mongoDb then you should use $mergeObject in mongo aggregation.
var a = [
{
"_id" : "5ba8d8dfaa988532967029af",
"level" : 2,
"completed" : 5,
"asset" : "5ba8caa1aa98853296702989"
},
{
"_id" : "5ba8d8dfaa988532967029b0",
"level" : 2,
"completed" : 3,
"asset" : "5ba8caf6aa9885329670298a"
}];
var b = [
{
"total" : 1,
"asset" : "5ba8caa1aa98853296702989",
"level" : 2
},
{
"total" : 1,
"asset" : "5ba8caf6aa9885329670298a",
"level" : 2
}];
var output = [];
function extend(obj, src) {
src.forEach(function(key,index) {
if(obj[index]["asset"] == src[index]["asset"] && obj[index]["level"] == src[index]["level"]){
let c = {...obj[index],...src[index]};
output.push(c);
}
});
}
extend(a, b);
console.log(output)

You can try below aggregation
db.collection.aggregate([
{ "$project": {
"Result": {
"$map": {
"input": "$total",
"as": "c",
"in": {
"total": "$$c.total",
"level": "$$c.level",
"asset": "$$c.asset",
"completed": {
"$arrayElemAt": [
"$ids.completed",
{ "$indexOfArray": ["$ids.asset", "$$c.asset"] }
]
}
}
}
}
}}
])
Output
[
{
"Result": [
{
"asset": ObjectId("5ba8caa1aa98853296702989"),
"completed": 5,
"level": 2,
"total": 1
},
{
"asset": ObjectId("5ba8caf6aa9885329670298a"),
"completed": 3,
"level": 2,
"total": 1
},
{
"asset": ObjectId("5ba8cb09aa9885329670298b"),
"level": 2,
"total": 1
}
]
}
]

Related

mongodb aggregate sum item as nested data

Here is my some sample data in collection sale
[
{group:2, item:a, qty:3 },
{group:2, item:b, qty:3 },
{group:2, item:b, qty:2 },
{group:1, item:a, qty:3 },
{group:1, item:a, qty:5 },
{group:1, item:b, qty:5 }
]
and I want to query data like below and sort the popular group to the top
[
{ group:1, items:[{name:'a',total_qty:8},{name:'b',total_qty:5} ],total_qty:13 },
{ group:2, items:[{name:'a',total_qty:3},{name:'b',total_qty:5} ],total_qty:8 },
]
Actually we can loop in server script( php, nodejs ...) but the problem is pagination. I cannot use skip to get the right result.

The following query can get us the expected output:
db.collection.aggregate([
{
$group:{
"_id":{
"group":"$group",
"item":"$item"
},
"group":{
$first:"$group"
},
"item":{
$first:"$item"
},
"total_qty":{
$sum:"$qty"
}
}
},
{
$group:{
"_id":"$group",
"group":{
$first:"$group"
},
"items":{
$push:{
"name":"$item",
"total_qty":"$total_qty"
}
},
"total_qty":{
$sum:"$total_qty"
}
}
},
{
$project:{
"_id":0
}
}
]).pretty()
Data set:
{
"_id" : ObjectId("5d84a37febcbd560107c54a7"),
"group" : 2,
"item" : "a",
"qty" : 3
}
{
"_id" : ObjectId("5d84a37febcbd560107c54a8"),
"group" : 2,
"item" : "b",
"qty" : 3
}
{
"_id" : ObjectId("5d84a37febcbd560107c54a9"),
"group" : 2,
"item" : "b",
"qty" : 2
}
{
"_id" : ObjectId("5d84a37febcbd560107c54aa"),
"group" : 1,
"item" : "a",
"qty" : 3
}
{
"_id" : ObjectId("5d84a37febcbd560107c54ab"),
"group" : 1,
"item" : "a",
"qty" : 5
}
{
"_id" : ObjectId("5d84a37febcbd560107c54ac"),
"group" : 1,
"item" : "b",
"qty" : 5
}
Output:
{
"group" : 2,
"items" : [
{
"name" : "b",
"total_qty" : 5
},
{
"name" : "a",
"total_qty" : 3
}
],
"total_qty" : 8
}
{
"group" : 1,
"items" : [
{
"name" : "b",
"total_qty" : 5
},
{
"name" : "a",
"total_qty" : 8
}
],
"total_qty" : 13
}

You need to use $group aggregation with $sum and $push accumulator
db.collection.aggregate([
{ "$group": {
"_id": "$group",
"items": { "$push": "$$ROOT" },
"total_qty": { "$sum": "$qty" }
}},
{ "$sort": { "total_qty": -1 }}
])

MongoDB Need to $push array in nested foreach loops

I have 3 collections: parents, children and links with subset of data like:
Parents:
{ "_id": 1, "PID" : 1, "Pname" : "Joe", "Sal" : 20000 },
{ "_id": 2, "PID" : 2, "Pname" : "Jim", "Sal" : 14100 },
{ "_id": 3, "PID" : 3, "Pname" : "Bob", "Sal" : 13500 },
{ "_id": 4, "PID" : 4, "Pname" : "Amy", "Sal" : 12000 },
{ "_id": 5, "PID" : 5, "Pname" : "George", "Sal" : 10000 }
Children:
{ "_id" : 1, "CID" : 1, "Cname" : "Ronney", "Age" : 10 },
{ "_id" : 2, "CID" : 2, "Cname" : "Mo", "Age" : 11 },,
{ "_id" : 3, "CID" : 3, "Cname" : "Adam", "Age" : 13 },
{ "_id" : 4, "CID" : 4, "Cname" : "Eve", "Age" : 21 },
{ "_id" : 5, "CID" : 5, "Cname" : "Johny", "Age" : 19 },
{ "_id" : 6, "CID" : 6, "Cname" : "Sammy", "Age" : 25 },
{ "_id" : 7, "CID" : 7, "Cname" : "Sammy", "Age" : 23 }
Links:
{ "_id" : 1, "PID" : 1, "CID" : 1 },
{ "_id" : 2, "PID" : 1, "CID" : 3 },
{ "_id" : 3, "PID" : 2, "CID" : 5 },
{ "_id" : 4, "PID" : 2, "CID" : 7 },
{ "_id" : 5, "PID" : 2, "CID" : 2 },
{ "_id" : 6, "PID" : 4, "CID" : 4 },
{ "_id" : 7, "PID" : 5, "CID" : 6 }
I need to $push an array of children names into to the parents collection using the links collections which tie parent id to child id. So, for example Parent 1 will have:
{ "_id" :1, "PID" : 1, "Pname" : "Joe", "Sal" : 20000, “Children” : [“Ronny”, ”Adam”]} }
I think I can use a nested foreach loops to achieve this, but I am confused about how.
Any help would be greatly appreciated!

You can forego the usage of "nested loops" with any MongoDB version supporting the $lookup aggregation pipeline operation. This will allow the "joining" of the data from multiple sources into one result set.
Given the collections "parents", "children" and "links", you want to perform two $lookup operations followed by $unwind statements to get the related data from "links" first and then join that to the "children".
Finally for convenience you can $group to get the child names into an array for each parent:
db.parents.aggregate([
{ "$lookup": {
"from": "links",
"localField": "PID",
"foreignField": "PID",
"as": "children"
}},
{ "$unwind": "$children" },
{ "$lookup": {
"from": "children",
"localField": "children.CID",
"foreignField": "CID",
"as": "children"
}},
{ "$unwind": "$children" },
{ "$group": {
"_id": "$_id",
"children": { "$push": "$children.Cname" }
}}
])
Which on your data sample gives output like:
{ "_id" : 4, "children" : [ "Eve" ] }
{ "_id" : 5, "children" : [ "Sammy" ] }
{ "_id" : 2, "children" : [ "Johny", "Sammy", "Mo" ] }
{ "_id" : 1, "children" : [ "Ronney", "Adam" ] }
Now you want to use that data output as a basis to loop and update the appropriate "parent" documents. Taking into consideration that the actual data can and will likely be much larger:
let ops = [];
db.parents.aggregate([
{ "$lookup": {
"from": "links",
"localField": "PID",
"foreignField": "PID",
"as": "children"
}},
{ "$unwind": "$children" },
{ "$lookup": {
"from": "children",
"localField": "children.CID",
"foreignField": "CID",
"as": "children"
}},
{ "$unwind": "$children" },
{ "$group": {
"_id": "$_id",
"children": { "$push": "$children.Cname" }
}}
]).forEach(doc => {
ops = [
...ops,
{
"updateOne": {
"filter": { "_id": doc._id },
"update": {
"$push": { "Children": { "$each": doc.children } }
}
}
}
];
if ( ops.length >= 500 ) {
db.parents.bulkWrite(ops);
ops = [];
}
});
if ( ops.length != 0 ) {
db.parents.bulkWrite(ops);
ops = [];
}
Which then ammends your data as :
{ "_id" : 1, "PID" : 1, "Pname" : "Joe", "Sal" : 20000, "Children" : [ "Ronney", "Adam" ] }
{ "_id" : 2, "PID" : 2, "Pname" : "Jim", "Sal" : 14100, "Children" : [ "Johny", "Sammy", "Mo" ] }
{ "_id" : 3, "PID" : 3, "Pname" : "Bob", "Sal" : 13500 }
{ "_id" : 4, "PID" : 4, "Pname" : "Amy", "Sal" : 12000, "Children" : [ "Eve" ] }
{ "_id" : 5, "PID" : 5, "Pname" : "George", "Sal" : 10000, "Children" : [ "Sammy" ] }
The alternate to using the $push operator would be to use $set instead to write the whole array at once. But it is generally safter to $push or $addToSet which accounts for the "possibility" that an array may already exist at the location, and the intent is to "add" rather than "overwrite.

Counting with nested aggregation

I have been trying to group and count registration collection data for a stats page, as well as to make for dynamic registration, but I can't get it to count for more than one grouping.
Sample registration collection data:
{
"_id" : ObjectId("58ec60078cc818505fb75ace"),
"event" : "Women's BB",
"day" : "Saturday",
"group" : "nonpro",
"division" : "Women's",
"level" : "BB"
}
{
"_id" : ObjectId("58ec60078cc818505fb75acf"),
"event" : "Coed BB",
"day" : "Sunday",
"group" : "nonpro",
"division" : "Coed",
"level" : "BB"
}
{
"_id" : ObjectId("58ec60098cc818505fb75ad0"),
"event" : "Men's BB",
"day" : "Saturday",
"group" : "nonpro",
"division" : "Men's",
"level" : "BB"
}
{
"_id" : ObjectId("58ec60168cc818505fb75ad1"),
"event" : "Men's B",
"day" : "Saturday",
"group" : "nonpro",
"division" : "Men's",
"level" : "B"
}
{
"_id" : ObjectId("58ec60178cc818505fb75ad2"),
"event" : "Women's Open",
"day" : "Saturday",
"group" : "pro",
"division" : "Women's",
"level" : "Pro"
}
{
"_id" : ObjectId("58ec60188cc818505fb75ad3"),
"event" : "Men's Open",
"day" : "Saturday",
"group" : "pro",
"division" : "Men's",
"level" : "Pro"
}
I'd like to reorganize it and do counts returning something like this:
[ {_id: { day: "Saturday", group: "nonpro" },
count: 3,
divisions: [
{ division: "Men's",
count: 2,
levels: [
{ level: "BB", count: 1 },
{ level: "B", count: 1 }]
},
{ division: "Women's",
count: 1,
levels: [
{ level: "BB", count: 1 }]
}
},
{_id: { day: "Saturday", group: "pro" },
count: 2,
divisions: [
{ division: "Men's",
count: 1,
levels: [
{ level: "Pro", count: 1 }
},
{ division: "Women's",
count: 1,
levels: [
{ level: "Pro", count: 1 }]
}
},
{_id: { day: "Sunday", group: "nonpro" },
count: 1,
divisions: [
{ division: "Coed",
count: 1,
levels: [
{ level: "BB", count: 1 }
}
}]
I know I should be using the aggregate() function, but am having a hard time making it work with the count. Here is what my aggregate looks like so far:
Registration
.aggregate(
{ $group: {
_id: { day: "$day", group: "$group" },
events: { $addToSet: { division: "$division", level: "$level"} },
total: { $sum: 1}
}
})
This returns the total registrations per day/group combination, but if I try adding total: {$sum: 1} to the events set, I just get 1 (which makes sense). Is there a way to make this work in one database call, or do I need to do it separately for each level of grouping I need counts for?

You essentially need 3 levels of $group pipeline stages. The first one will group the documents by all four keys i.e. day, group, division and level. Aggregate the counts for the group
which will be the counts for the level.
The preceding group will take three keys i.e. day, group and division and the aggregate count will sum the previous group counts as well as creating the levels array.
The last group will be the day and group keys + the divisions list embedded with the results from the previous group.
Consider running the following pipeline for the expected results:
Registration.aggregate([
{
"$group": {
"_id": {
"day": "$day",
"group": "$group",
"division": "$division",
"level": "$level"
},
"count": { "$sum": 1 }
}
},
{
"$group": {
"_id": {
"day": "$_id.day",
"group": "$_id.group",
"division": "$_id.division"
},
"count": { "$sum": "$count" },
"levels": {
"$push": {
"level": "$_id.level",
"count": "$count"
}
}
}
},
{
"$group": {
"_id": {
"day": "$_id.day",
"group": "$_id.group"
},
"count": { "$sum": "$count" },
"divisions": {
"$push": {
"division": "$_id.division",
"count": "$count",
"levels": "$levels"
}
}
}
}
], (err, results) => {
if (err) throw err;
console.log(JSON.stringify(results, null, 4));
})
Sample Output
/* 1 */
{
"_id" : {
"day" : "Saturday",
"group" : "nonpro"
},
"count" : 3,
"divisions" : [
{
"division" : "Women's",
"count" : 1,
"levels" : [
{
"level" : "BB",
"count" : 1
}
]
},
{
"division" : "Men's",
"count" : 2,
"levels" : [
{
"level" : "BB",
"count" : 1
},
{
"level" : "B",
"count" : 1
}
]
}
]
}
/* 2 */
{
"_id" : {
"day" : "Saturday",
"group" : "pro"
},
"count" : 2,
"divisions" : [
{
"division" : "Women's",
"count" : 1,
"levels" : [
{
"level" : "Pro",
"count" : 1
}
]
},
{
"division" : "Men's",
"count" : 1,
"levels" : [
{
"level" : "Pro",
"count" : 1
}
]
}
]
}
/* 3 */
{
"_id" : {
"day" : "Sunday",
"group" : "nonpro"
},
"count" : 1,
"divisions" : [
{
"division" : "Coed",
"count" : 1,
"levels" : [
{
"level" : "BB",
"count" : 1
}
]
}
]
}

Use $ and $elemMatch to group entities

Considering the following document in my mongo DB instance :
{
"_id": 1,
"people": [
{"id": 1, "name": "foo"},
{"id": 2, "name": "bar"},
/.../
],
"stats": [
{"peopleId": 1, "workHours": 24},
{"peopleId": 2, "workHours": 36},
/.../
}
Each element in my collection represent the work of every employee in my company, each weeks. As an important note, peopleId may change from one week to another !
I would like to get all weeks where foo worked more than 24 hours. As you can see, the format is kinda annoying since the people name and the work hours are separated in my database. A simple $and is not enough.
I wonder if, using some $ and $elemMatch I can achieve doing this query.
Can I use this to group the "people" entities with "stats" entities ?

Query to get foo worked more than 24 hours.
db.collection.aggregate([
{$unwind: { path : "$people"}},
{$unwind: { path : "$stats"}},
{$match: { "people.name" : "foo"}},
{$group: {
_id: "$_id",
peopleIdMoreThan24: { $addToSet: {
$cond : { if : { $and : [ {"$eq" : ["$people.id", "$stats.peopleId" ] },
{"$gt" : ["$stats.workHours", 24] }]} , then : "$people.id", else: "Not satisfying the condition"}}}
}
},
{$unwind: { path : "$peopleIdMoreThan24" }},
{$match: { "peopleIdMoreThan24" : {$nin : [ "Not satisfying the condition"]}}},
]);
Data in collection:-
/* 1 */
{
"_id" : 1,
"people" : [
{
"id" : 1,
"name" : "foo"
},
{
"id" : 2,
"name" : "bar"
}
],
"stats" : [
{
"peopleId" : 1,
"workHours" : 24
},
{
"peopleId" : 2,
"workHours" : 36
}
]
}
/* 2 */
{
"_id" : 2,
"people" : [
{
"id" : 1,
"name" : "foo"
},
{
"id" : 2,
"name" : "bar"
}
],
"stats" : [
{
"peopleId" : 1,
"workHours" : 25
},
{
"peopleId" : 2,
"workHours" : 36
}
]
}
/* 3 */
{
"_id" : 3,
"people" : [
{
"id" : 1,
"name" : "foo"
},
{
"id" : 2,
"name" : "bar"
}
],
"stats" : [
{
"peopleId" : 1,
"workHours" : 25
},
{
"peopleId" : 2,
"workHours" : 36
}
]
}
Output:-
The output has document id and people id of foo worked more than 24 hours.
/* 1 */
{
"_id" : 3,
"peopleIdMoreThan24" : 1
}
/* 2 */
{
"_id" : 2,
"peopleIdMoreThan24" : 1
}

Aggregate group multiple fields

Given the following dataset:
{ "_id" : 1, "city" : "Yuma", "cat": "roads", "Q1" : 0, "Q2" : 25, "Q3" : 0, "Q4" : 0 }
{ "_id" : 2, "city" : "Reno", "cat": "roads", "Q1" : 30, "Q2" : 0, "Q3" : 0, "Q4" : 60 }
{ "_id" : 3, "city" : "Yuma", "cat": "parks", "Q1" : 0, "Q2" : 0, "Q3" : 45, "Q4" : 0 }
{ "_id" : 4, "city" : "Reno", "cat": "parks", "Q1" : 35, "Q2" : 0, "Q3" : 0, "Q4" : 0 }
{ "_id" : 5, "city" : "Yuma", "cat": "roads", "Q1" : 0, "Q2" : 15, "Q3" : 0, "Q4" : 20 }
I'm trying to achieve the following result. It would be great to just return the totals greater than zero, and also compress each city, cat and Qx total to a single record.
{
"city" : "Yuma",
"cat" : "roads",
"Q2total" : 40
},
{
"city" : "Reno",
"cat" : "roads",
"Q1total" : 30
},
{
"city" : "Reno",
"cat" : "roads",
"Q4total" : 60
},
{
"city" : "Yuma",
"cat" : "parks",
"Q3total" : 45
},
{
"city" : "Reno",
"cat" : "parks",
"Q1total" : 35
},
{
"city" : "Yuma",
"cat" : "roads",
"Q4total" : 20
}
Possible?

We could ask, to what end? Your documents already have a nice consistent Object structure which is recommended. Having objects with varying keys is not a great idea. Data is "data" and should not really be the name of the keys.
With that in mind, the aggregation framework actually follows this sense and does not allow for the generation of arbitrary key names from data contained in the document. But you could get a similar result with the output as data points:
db.junk.aggregate([
// Aggregate first to reduce the pipeline documents somewhat
{ "$group": {
"_id": {
"city": "$city",
"cat": "$cat"
},
"Q1": { "$sum": "$Q1" },
"Q2": { "$sum": "$Q2" },
"Q3": { "$sum": "$Q3" },
"Q4": { "$sum": "$Q4" }
}},
// Convert the "quarter" elements to array entries with the same keys
{ "$project": {
"totals": {
"$map": {
"input": { "$literal": [ "Q1", "Q2", "Q3", "Q4" ] },
"as": "el",
"in": { "$cond": [
{ "$eq": [ "$$el", "Q1" ] },
{ "quarter": "$$el", "total": "$Q1" },
{ "$cond": [
{ "$eq": [ "$$el", "Q2" ] },
{ "quarter": "$$el", "total": "$Q2" },
{ "$cond": [
{ "$eq": [ "$$el", "Q3" ] },
{ "quarter": "$$el", "total": "$Q3" },
{ "quarter": "$$el", "total": "$Q4" }
]}
]}
]}
}
}
}},
// Unwind the array produced
{ "$unwind": "$totals" },
// Filter any "0" resutls
{ "$match": { "totals.total": { "$ne": 0 } } },
// Maybe project a prettier "flatter" output
{ "$project": {
"_id": 0,
"city": "$_id.city",
"cat": "$_id.cat",
"quarter": "$totals.quarter",
"total": "$totals.total"
}}
])
Which gives you results like this:
{ "city" : "Reno", "cat" : "parks", "quarter" : "Q1", "total" : 35 }
{ "city" : "Yuma", "cat" : "parks", "quarter" : "Q3", "total" : 45 }
{ "city" : "Reno", "cat" : "roads", "quarter" : "Q1", "total" : 30 }
{ "city" : "Reno", "cat" : "roads", "quarter" : "Q4", "total" : 60 }
{ "city" : "Yuma", "cat" : "roads", "quarter" : "Q2", "total" : 40 }
{ "city" : "Yuma", "cat" : "roads", "quarter" : "Q4", "total" : 20 }
You could alternately use mapReduce which allows "some" flexibility with key names. The catch is though that your aggregation is still by "quarter", so you need that as part of the primary key, which cannot be changed once emitted.
Additionally, you cannot "filter" any aggregated results of "0" without a second pass after outputting to a collection, so it's not really of much use for what you want to do, unless you can live with a second mapReduce operation of "transform" query on the output collection.
Worth note is if you look at what is being done in the "second" pipeline stage here with $project and $map you will see that the document structure is essentially being altered to sometime like what you could alternately structure your documents like originally, like this:
{
"city" : "Reno",
"cat" : "parks"
"totals" : [
{ "quarter" : "Q1", "total" : 35 },
{ "quarter" : "Q2", "total" : 0 },
{ "quarter" : "Q3", "total" : 0 },
{ "quarter" : "Q4", "total" : 0 }
]
},
{
"city" : "Yuma",
"cat" : "parks"
"totals" : [
{ "quarter" : "Q1", "total" : 0 },
{ "quarter" : "Q2", "total" : 0 },
{ "quarter" : "Q3", "total" : 45 },
{ "quarter" : "Q4", "total" : 0 }
]
}
Then the aggregation operation becomes simple for your documents to the same results as shown above:
db.collection.aggregate([
{ "$unwind": "$totals" },
{ "$group": {
"_id": {
"city": "$city",
"cat": "$cat",
"quarter": "$totals.quarter"
},
"ttotal": { "$sum": "$totals.total" }
}},
{ "$match": { "ttotal": { "$ne": 0 } },
{ "$project": {
"_id": 0,
"city": "$_id.city",
"cat": "$_id.cat",
"quarter": "$_id.quarter",
"total": "$ttotal"
}}
])
So it might make more sense to consider structuring your documents in that way to begin with and avoid any overhead required by the document transformation.
I think you'll find that consistent key names makes a far better object model to program to, where you should be reading the data point from the key-value and not the key-name. If you really need to, then it's a simple matter of reading the data from the object and transforming the keys of each already aggregated result in post processing.

We Keep Coding

iphone swift flutter scala powershell matlab mongodb postgresql perl eclipse

Compare two array fields in same document - mongodb

Related

mongodb aggregate sum item as nested data

MongoDB Need to $push array in nested foreach loops

Counting with nested aggregation

Use $ and $elemMatch to group entities

Aggregate group multiple fields

Categories

Resources