Mongodb Aggregated documents into a nested hierarchy - mongodb

I am trying to aggregate documents in a collection to a nested graph. Below is the sample data from comments store application I am working with -
Posts Collection:
{
"_id" : "1",
"text" : "hey",
}
{
"_id" : "2",
"text" : "hello",
"replyTo" : "1"
}
{
"_id" : "3",
"text" : "What's up?",
"replyTo" : "2"
}
{
"_id" : "4",
"text" : "How are you",
"replyTo" : "1"
}
{
"_id" : "5",
"text" : "Knock, knock!",
}
The result I am expecting should look like :
{
"_id": "1",
"text": "hey",
"replies": [
{
"_id": "2",
"text": "hello",
"replyTo": "1",
"replies": [
{
"_id": "3",
"text": "What's up?",
"replyTo": "2"
}
]
},
{
"_id": "4",
"text": "How are you",
"replyTo": "1"
}
]
}
{
"_id" : "5",
"text" : "Knock, knock!",
}
I tried $graphLookup which is processing documents recursively but the results is a flat array which is not what I expect -
db.posts.aggregate(
[
{
"$graphLookup" : {
"from" : "posts",
"startWith" : "$_id",
"connectFromField" : "_id",
"connectToField" : "replyTo",
"as" : "replies"
}
}
],
{
"allowDiskUse" : false
}
);
Result:
{
"_id" : "1",
"text" : "hey",
"slug_path" : "1",
"replies" : [
{
"_id" : "2",
"text" : "hello",
"slug_path" : "1/2",
"replyTo" : "1"
},
{
"_id" : "4",
"text" : "How are you",
"slug_path" : "1/4",
"replyTo" : "1"
},
{
"_id" : "3",
"text" : "what's up?",
"slug_path" : "1/2/3",
"replyTo" : "2"
}
]
}
{
"_id" : "2",
"text" : "hello",
"slug_path" : "1/2",
"replyTo" : "1",
"replies" : [
{
"_id" : "3",
"text" : "what's up?",
"slug_path" : "1/2/3",
"replyTo" : "2"
}
]
}
{
"_id" : "4",
"text" : "How are you",
"slug_path" : "1/4",
"replyTo" : "1",
"replies" : [
]
}
{
"_id" : "3",
"text" : "what's up?",
"slug_path" : "1/2/3",
"replyTo" : "2",
"replies" : [
]
}
{
"_id" : "5",
"text" : "Knock, knock!",
"replies" : [
]`
}
Is there a way I can achieve the type of aggregation I am expecting?
Thanks
Davinder

I ended up ditching $graphLookup as it can only give 1-level parent-child relationships and can't recursively create a tree like structure as results. I was able to create tree like structure completely in Java and program is running fast from performance perspective.

Related

mongodb $lookup return empty array

I'm new to mongodb and in this question I have 2 collections, one is selected_date, another is global_mobility_report, what I'm trying to do is to find entries in global_mobility_report whose date is in the selected_date so I use $lookup to join the two collections.
date_selected:
{
"_id" : ObjectId("5f60d81ba43174cf172ebfdc"),
"date" : ISODate("2020-05-22T00:00:00.000+08:00")
},
{
"_id" : ObjectId("5f60d81ba43174cf172ebfdd"),
"date" : ISODate("2020-05-23T00:00:00.000+08:00")
},
{
"_id" : ObjectId("5f60d81ba43174cf172ebfde"),
"date" : ISODate("2020-05-24T00:00:00.000+08:00")
},
{
"_id" : ObjectId("5f60d81ba43174cf172ebfdf"),
"date" : ISODate("2020-05-25T00:00:00.000+08:00")
},
{
"_id" : ObjectId("5f60d81ba43174cf172ebfe0"),
"date" : ISODate("2020-05-26T00:00:00.000+08:00")
},
{
"_id" : ObjectId("5f60d81ba43174cf172ebfe1"),
"date" : ISODate("2020-05-27T00:00:00.000+08:00")
}
global_mobility_report:
{
"_id" : ObjectId("5f49fb013acddb5eec37f99e"),
"country_region_code" : "AE",
"country_region" : "United Arab Emirates",
"sub_region_1" : "",
"sub_region_2" : "",
"metro_area" : "",
"iso_3166_2_code" : "",
"census_fips_code" : "",
"date" : "2020-02-15",
"retail_and_recreation_percent_change_from_baseline" : "0",
"grocery_and_pharmacy_percent_change_from_baseline" : "4",
"parks_percent_change_from_baseline" : "5",
"transit_stations_percent_change_from_baseline" : "0",
"workplaces_percent_change_from_baseline" : "2",
"residential_percent_change_from_baseline" : "1"
},
{
"_id" : ObjectId("5f49fb013acddb5eec37f99f"),
"country_region_code" : "AE",
"country_region" : "United Arab Emirates",
"sub_region_1" : "",
"sub_region_2" : "",
"metro_area" : "",
"iso_3166_2_code" : "",
"census_fips_code" : "",
"date" : "2020-02-16",
"retail_and_recreation_percent_change_from_baseline" : "1",
"grocery_and_pharmacy_percent_change_from_baseline" : "4",
"parks_percent_change_from_baseline" : "4",
"transit_stations_percent_change_from_baseline" : "1",
"workplaces_percent_change_from_baseline" : "2",
"residential_percent_change_from_baseline" : "1"
},
{
"_id" : ObjectId("5f49fb013acddb5eec37f9a0"),
"country_region_code" : "AE",
"country_region" : "United Arab Emirates",
"sub_region_1" : "",
"sub_region_2" : "",
"metro_area" : "",
"iso_3166_2_code" : "",
"census_fips_code" : "",
"date" : "2020-02-17",
"retail_and_recreation_percent_change_from_baseline" : "-1",
"grocery_and_pharmacy_percent_change_from_baseline" : "1",
"parks_percent_change_from_baseline" : "5",
"transit_stations_percent_change_from_baseline" : "1",
"workplaces_percent_change_from_baseline" : "2",
"residential_percent_change_from_baseline" : "1"
},
{
"_id" : ObjectId("5f49fb013acddb5eec37f9a1"),
"country_region_code" : "AE",
"country_region" : "United Arab Emirates",
"sub_region_1" : "",
"sub_region_2" : "",
"metro_area" : "",
"iso_3166_2_code" : "",
"census_fips_code" : "",
"date" : "2020-02-18",
"retail_and_recreation_percent_change_from_baseline" : "-2",
"grocery_and_pharmacy_percent_change_from_baseline" : "1",
"parks_percent_change_from_baseline" : "5",
"transit_stations_percent_change_from_baseline" : "0",
"workplaces_percent_change_from_baseline" : "2",
"residential_percent_change_from_baseline" : "1"
}
when I try to find all entries in global with 'date' match in selected_date(I have converted the string to data format in gobal_mobility_report), it returns empty array.
db.global_mobility_report.aggregate([
{$match:{country_region:"Indonesia"}},
{$addFields: {"dateconverted": {$convert: { input: "$date", to: "date", onError:"onErrorExpr", onNull:"onNullExpr"}:}}},
{
$lookup:
{
from: "selected_date",
localField:"dateconverted",
foreignField: "date",
as: "selected_dates" // empty
}
})]
The output is:
{
"_id" : ObjectId("5f49fd6a3acddb5eec4427bb"),
"country_region_code" : "ID",
"country_region" : "Indonesia",
"sub_region_1" : "",
"sub_region_2" : "",
"metro_area" : "",
"iso_3166_2_code" : "",
"census_fips_code" : "",
"date" : "2020-02-15",
"retail_and_recreation_percent_change_from_baseline" : "-2",
"grocery_and_pharmacy_percent_change_from_baseline" : "-2",
"parks_percent_change_from_baseline" : "-8",
"transit_stations_percent_change_from_baseline" : "1",
"workplaces_percent_change_from_baseline" : "5",
"residential_percent_change_from_baseline" : "1",
"dateconverted" : ISODate("2020-02-15T08:00:00.000+08:00"),
"selected_dates" : [ ]
},
{
"_id" : ObjectId("5f49fd6a3acddb5eec4427bc"),
"country_region_code" : "ID",
"country_region" : "Indonesia",
"sub_region_1" : "",
"sub_region_2" : "",
"metro_area" : "",
"iso_3166_2_code" : "",
"census_fips_code" : "",
"date" : "2020-02-16",
"retail_and_recreation_percent_change_from_baseline" : "-3",
"grocery_and_pharmacy_percent_change_from_baseline" : "-3",
"parks_percent_change_from_baseline" : "-7",
"transit_stations_percent_change_from_baseline" : "-4",
"workplaces_percent_change_from_baseline" : "2",
"residential_percent_change_from_baseline" : "2",
"dateconverted" : ISODate("2020-02-16T08:00:00.000+08:00"),
"selected_dates" : [ ]
}
The reason you are getting an empty array is because dateconverted does not match the date field.
The $lookup operator does an equality between the localField and the foreigntField field, so basically with an example
db.users.insertMany([
{ email: "test#example.com", userId: 0 },
{ email: "test2#example.com", userId: 1 },
{ email: "test3#example.com", userId: 2 },
{ email: "test3#example.com", userId: 3 }
]);
db.posts.insertMany([
{ by: 0, post: "hello world" },
{ by: 0 , post: "hello earthlings" },
{ by: 3, post: "test test test"}
]);
db.posts.aggregate([
{
$lookup: {
from: "users",
localField: "by",
foreignField: "userId",
as: "list_of_post"
}
}
]).toArray();
The output will be what it suppose to be, because the localField matched the ForeignField
[
{
"_id" : ObjectId("5f60f6859a6df3133b325eb0"),
"by" : 0,
"post" : "hello world",
"list_of_post" : [
{
"_id" : ObjectId("5f60f6849a6df3133b325eac"),
"email" : "test#example.com",
"userId" : 0
}
]
},
{
"_id" : ObjectId("5f60f6859a6df3133b325eb1"),
"by" : 0,
"post" : "hello earthlings",
"list_of_post" : [
{
"_id" : ObjectId("5f60f6849a6df3133b325eac"),
"email" : "test#example.com",
"userId" : 0
}
]
},
{
"_id" : ObjectId("5f60f6859a6df3133b325eb2"),
"by" : 3,
"post" : "test test test",
"list_of_post" : [
{
"_id" : ObjectId("5f60f6849a6df3133b325eaf"),
"email" : "test3#example.com",
"userId" : 3
}
]
}
]
Let's mimic a situation where it does not match
db.posts.drop();
db.posts.insertMany([
{ by: 20, post: "hello world" },
{ by: 23 , post: "hello earthlings" },
{ by: 50, post: "test test test"}
]);
We get an empty array
[
{
"_id" : ObjectId("5f60f83344304796ae700b4d"),
"by" : 20,
"post" : "hello world",
"list_of_post" : [ ]
},
{
"_id" : ObjectId("5f60f83344304796ae700b4e"),
"by" : 23,
"post" : "hello earthlings",
"list_of_post" : [ ]
},
{
"_id" : ObjectId("5f60f83344304796ae700b4f"),
"by" : 50,
"post" : "test test test",
"list_of_post" : [ ]
}
]
So, back to your question, the reason for the empty array is as a result of the dateconverted field not matching the date field. So, let's take a look at an example.
In the first document the dateconverted is
ISODate("2020-02-16T08:00:00.000+08:00") and checking at date_selected document , there is no field that correspond to this value ISODate("2020-02-16T08:00:00.000+08:00"). But let's manually insert this, so you will properly understand what I am talking about.
db.date_selected.insert({
"_id" : ObjectId(),
"date": ISODate("2020-02-16T08:00:00.000+08:00")
});
Running the aggregation pipeline will also make selected_dates an empty array. And the other thing you have to note is that the mm/dd/yyy part of the ISODate object does not also match any document in your question. Secondly, you have to devise another means of running the comparison, because the aggregation pipeline in the $addFileds stage will be affected by timezone and other issues as well.

Find a nested object field inside an array in mongodb aggregate

I have this object as below.
{
"_id" : ObjectId("5ec80a981e89a84b19934039"),
"status" : "active",
"organizationId" : "1",
"productId" : "1947",
"name" : "BOOKEND & PAPER WEIGHT SET – ZODIAC PIG – RED COPPER + PLATINUM",
"description" : "This global exclusive Zodiac bookend and paperweight set from Zuny will stand auspiciously on your bookcase and table, spreading good luck and fortune throughout your home just in time for the Year of the Pig.",
"brand" : "ZUNY",
"created" : "2018-09-28 00:00:00",
"updated" : "2020-05-22 09:19:07",
"mainImage" : "https://",
"availableOnline" : true,
"colors" : [
{
"images" : [
{
"type" : "studio",
"url" : "https://"
},
{
"type" : "studio",
"url" : "https://"
},
{
"type" : "studio",
"url" : "https://"
}
],
"extraInfo" : [
{
"type" : "text-tag",
"title" : "CATEGORY",
"tags" : [
"HOME FURNISHING & DÉCOR",
"LIFESTYLE"
]
},
{
"type" : "text-tag",
"title" : "BRAND",
"tags" : [
"ZUNY"
]
},
{
"type" : "text-tag",
"title" : "COLOUR",
"tags" : [
"GOLD",
"ROSE GOLD"
]
},
{
"type" : "text-tag",
"title" : "SEASON",
"tags" : [
"AW(2018)"
]
},
{
"type" : "text-tag",
"title" : "HASHTAG",
"tags" : [
"BOOKCASES",
"BOOKEND",
"COLOUR",
"EXCLUSIVE",
"GLOBAL EXCLUSIVE",
"HOME",
"LEATHER",
"MOTIF",
"OBJECTS",
"PAPER",
"PAPERWEIGHT",
"PLATINUM",
"SET",
"SYNTHETIC",
"ZODIAC",
"HANDMADE",
"time"
]
}
],
"_id" : ObjectId("5ec80a981e89a84b1993403a"),
"colorId" : "1",
"color" : "ROSE GOLD",
"status" : "active",
"sizes" : [
{
"extraInfo" : [
{
"type" : "text-block",
"title" : "Size And Fit",
"text" : ""
},
{
"type" : "text-block",
"title" : "Information",
"text" : "Global exclusive. Colour: Copper/Platinum. Set includes: Zodiac Pig bookend (x 1), Zodiac Pig paperweight (x 1). Metallic copper- and platinum-tone synthetic leather. Pig motif. Iron pellet filling. Handmade"
}
],
"_id" : ObjectId("5ec80a981e89a84b1993403b"),
"sizeId" : "1",
"neo" : "0210111790664",
"size" : "*",
"originalPrice" : "1060.00",
"sellingPrice" : "1060.00",
"discountPercent" : "0.00",
"url" : "https://",
"status" : "active",
"currency" : "HK$",
"stores" : [
{
"storeId" : "1",
"quantity" : 70,
"_id" : ObjectId("5ec80a981e89a84b1993403c"),
"available" : 70,
"reserved" : 0,
"name" : "Park Street",
"status" : "active"
},
{
"storeId" : "2",
"quantity" : 95,
"_id" : ObjectId("5ec80a981e89a84b1993403d"),
"name" : "Rashbehari",
"status" : "active"
}
]
}
]
}
],
"__v" : 0
}
I want the output as follows
{
"name": "Mock Collection",
"collectionId": "92",
"products": [
{
"title": "GLOBAL EXCLUSIVE OFF-SHOULDER SHIRT DRESS",
"imageUrl": "https://",
"productId": "21174",
"currency": "" // This should be this.colors[0].sizes[0].currency
},
]
}
How to get the nested field. I tried using arrayElemAt by which I was able to get to colors[0]. But I am confused how to get inside the nested object of sizes from there. Also the currency node should have the exact value. It comes like currency:{currency: value} which I don't want.
Please help!
Not sure how you've got that output but to extract currency from first object of sizes then you need to try this :
db.collection.aggregate([
{
$project: {
currency: {
$arrayElemAt: [
{
$arrayElemAt: [ "$colors.sizes.currency", 0 ] // gives an array of currency values, in your case since you've only one object just an array of one value
},
0
]
}
}
}
])
Test : mongoplayground

All Mongo documents with duplicated objects inside array

Find documents with duplicated objects inside an array.
Some answers works just with array made of "basic type elements" (i.e. array of strings). Here I want to filter on certain objects fields
In example:
{
"name": "1",
"arr": [{ "type": "fruit", "name":"pear"},{ "type": "fruit","name":"banana"}]
},
{
"name":"2",
"arr": [{"type":"fish"}]
}
Given the above two documents, I want to retrieve just document 1, because it has 2 elements in the array that have the same type. (Of course I want all documents with such property, not just one)
The following query can get us the expected output:
db.collection.find({
$expr:{
$ne:[
{
$size:"$arr"
},
{
$size:{
$setUnion:["$arr.type"]
}
}
]
}
}).pretty()
Data set:
{
"_id" : ObjectId("5d7b8546d76ccfa3cb0f133c"),
"name" : "1",
"arr" : [
{
"type" : "fruit",
"name" : "pear"
},
{
"type" : "fruit",
"name" : "banana"
}
]
}
{
"_id" : ObjectId("5d7b8546d76ccfa3cb0f133d"),
"name" : "2",
"arr" : [
{
"type" : "fish"
}
]
}
{
"_id" : ObjectId("5d7b8546d76ccfa3cb0f133e"),
"name" : "3",
"arr" : [
{
"type" : "product",
"name" : "watch"
},
{
"type" : "product",
"name" : "Pen"
}
]
}
Output:
{
"_id" : ObjectId("5d7b8546d76ccfa3cb0f133c"),
"name" : "1",
"arr" : [
{
"type" : "fruit",
"name" : "pear"
},
{
"type" : "fruit",
"name" : "banana"
}
]
}
{
"_id" : ObjectId("5d7b8546d76ccfa3cb0f133e"),
"name" : "3",
"arr" : [
{
"type" : "product",
"name" : "watch"
},
{
"type" : "product",
"name" : "Pen"
}
]
}
Query analysis: We are filtering documents in which the size of arr is not equal to the count of unique type present in the arr

How to merge two matching objects from different array into one object?

I have a situation where I have got one result from aggregation where I am getting data in this format.
{
"_id" : ObjectId("5a42432d69cbfed9a410e8ad"),
"bacId" : "BAC0023444",
"cardId" : "2",
"defaultCardOrder" : "2",
"alias" : "Finance",
"label" : "Finance",
"for" : "",
"cardTooltip" : {
"enable" : true,
"text" : ""
},
"dataBlocks" : [
{
"defaultBlockOrder" : "1",
"blockId" : "1",
"data" : "0"
},
{
"defaultBlockOrder" : "2",
"blockId" : "2",
"data" : "0"
},
{
"defaultBlockOrder" : "3",
"blockId" : "3",
"data" : "0"
}
],
"templateBlocks" : [
{
"blockId" : "1",
"label" : "Gross Profit",
"quarter" : "",
"data" : "",
"dataType" : {
"typeId" : "2"
},
"tooltip" : {
"enable" : true,
"text" : ""
}
},
{
"blockId" : "2",
"label" : "Profit Forecast",
"quarter" : "",
"data" : "",
"dataType" : {
"typeId" : "2"
},
"tooltip" : {
"enable" : true,
"text" : ""
}
},
{
"blockId" : "3",
"label" : "Resource Billing",
"quarter" : "",
"data" : "",
"dataType" : {
"typeId" : "2"
},
"tooltip" : {
"enable" : true,
"text" : ""
}
}
]
},
{
"_id" : ObjectId("5a42432d69cbfed9a410e8ad"),
"bacId" : "BAC0023444",
"cardId" : "3",
"defaultCardOrder" : "3",
"alias" : "Staffing",
"label" : "Staffing",
"for" : "",
"cardTooltip" : {
"enable" : true,
"text" : ""
},
"dataBlocks" : [
{
"defaultBlockOrder" : "1",
"blockId" : "1",
"data" : "1212"
},
{
"defaultBlockOrder" : "2",
"blockId" : "2",
"data" : "1120"
},
{
"defaultBlockOrder" : "3",
"blockId" : "3",
"data" : "1200"
}
],
"templateBlocks" : [
{
"blockId" : "1",
"label" : "Staffing Planner",
"quarter" : "",
"data" : "",
"dataType" : {
"typeId" : "1"
},
"tooltip" : {
"enable" : true,
"text" : ""
}
},
{
"blockId" : "2",
"label" : "Baseline",
"quarter" : "",
"data" : "",
"dataType" : {
"typeId" : "1"
},
"tooltip" : {
"enable" : true,
"text" : ""
}
},
{
"blockId" : "3",
"label" : "Projected",
"quarter" : "",
"data" : "",
"dataType" : {
"typeId" : "1"
},
"tooltip" : {
"enable" : true,
"text" : ""
}
}
]
}
Now I want to compare the two array of objects for each row, here in this case its "dataBlocks" and "templateBlocks" based on "blockId" s and I want to get the result in the following format.
{
"_id" : ObjectId("5a42432d69cbfed9a410e8ad"),
"bacId" : "BAC0023444",
"cardId" : "2",
"defaultCardOrder" : "2",
"alias" : "Finance",
"label" : "Finance",
"for" : "",
"cardTooltip" : {
"enable" : true,
"text" : ""
},
"blocks" : [
{
"defaultBlockOrder" : "1",
"blockId" : "1",
"data" : "0",
"label" : "Gross Profit",
"quarter" : "",
"dataType" : {
"typeId" : "2"
},
"tooltip" : {
"enable" : true,
"text" : ""
}
},
{
"defaultBlockOrder" : "2",
"blockId" : "2",
"data" : "0",
"label" : "Profit Forecast",
"quarter" : "",
"dataType" : {
"typeId" : "2"
},
"tooltip" : {
"enable" : true,
"text" : ""
}
},
{
"defaultBlockOrder" : "3",
"blockId" : "3",
"data" : "0",
"label" : "Resource Billing",
"quarter" : "",
"dataType" : {
"typeId" : "2"
},
"tooltip" : {
"enable" : true,
"text" : ""
}
}
]
},
{
"_id" : ObjectId("5a42432d69cbfed9a410e8ad"),
"bacId" : "BAC0023444",
"cardId" : "3",
"defaultCardOrder" : "3",
"alias" : "Staffing",
"label" : "Staffing",
"for" : "",
"cardTooltip" : {
"enable" : true,
"text" : ""
},
"dataBlocks" : [
{
"defaultBlockOrder" : "1",
"blockId" : "1",
"data" : "1212",
"label" : "Staffing Planner",
"quarter" : "",
"dataType" : {
"typeId" : "1"
},
"tooltip" : {
"enable" : true,
"text" : ""
}
},
{
"defaultBlockOrder" : "2",
"blockId" : "2",
"data" : "1120",
"label" : "Baseline",
"quarter" : "",
"dataType" : {
"typeId" : "1"
},
"tooltip" : {
"enable" : true,
"text" : ""
}
},
{
"defaultBlockOrder" : "3",
"blockId" : "3",
"data" : "1200",
"label" : "Projected",
"quarter" : "",
"dataType" : {
"typeId" : "1"
},
"tooltip" : {
"enable" : true,
"text" : ""
}
}
]
}
Is it possible to get it done with mongodb ? I am using 3.4 and trying to achieve this using aggregation.
Thanks in advance.
You can try below aggregation in 3.6.
The query below iterates the dataBlocks array and merges the data block element with template block element. The template block is looked up using $indexofArray which locates the array index with matching block id and $arrayElemAt to access the element at the found index.
db.collection_name.aggregate([{"$addFields":{
"blocks":{
"$map":{
"input":"$dataBlocks",
"in":{
"$mergeObjects":[
"$$this",
{"$arrayElemAt":[
"$templateBlocks",
{"$indexOfArray":["$templateBlocks.blockId","$$this.blockId"]}
]
}
]
}
}
}
}}])
For 3.4, replace $mergeObjects with combination of $arrayToObject, $objectToArray and $concatArrays to merge the each array element from both arrays.
db.collection_name.aggregate([{"$addFields":{
"blocks":{
"$map":{
"input":"$dataBlocks",
"in":{
"$arrayToObject":{
"$concatArrays":[
{"$objectToArray":"$$this"},
{"$objectToArray":{
"$arrayElemAt":[
"$templateBlocks",
{"$indexOfArray":["$templateBlocks.blockId","$$this.blockId"]
}
]
}}
]
}
}
}
}
}}])
You can use project with exclusion as last stage to remove array fields from output.
{"$project":{"templateBlocks":0,"dataBlocks":0}}
The following query does the job:
db.merge.aggregate([
// unwind twice
{$unwind: "$templateBlocks"},
{$unwind: "$dataBlocks"},
// get rid of documents where dataBlocks.blockId and
// templateBlocks.blockId are not equal
{$redact: {$cond: [{
$eq: [
"$dataBlocks.blockId",
"$templateBlocks.blockId"
]
},
"$$KEEP",
"$$PRUNE"
]
}
},
// merge dataBlocks and templateBlocks into a single document
{$project: {
bacId: 1,
cardId: 1,
defaultCardOrder: 1,
alias: 1,
label: 1,
for: 1,
cardTooltip: 1,
dataBlocks: {
defaultBlockOrder: "$dataBlocks.defaultBlockOrder",
blockId: "$dataBlocks.blockId",
data: "$dataBlocks.data",
label: "$templateBlocks.label",
quarter: "$templateBlocks.quarter",
data: "$templateBlocks.data",
dataType: "$templateBlocks.dataType",
tooltip: "$templateBlocks.tooltip"
}
}
},
// group to put correspondent dataBlocks to an array
{$group: {
_id: {
_id: "$_id",
bacId: "$bacId",
cardId: "$cardId",
defaultCardOrder: "$defaultCardOrder",
alias: "$alias",
label: "$label",
for: "$for",
cardTooltip: "$cardTooltip"
},
dataBlocks: {$push: "$dataBlocks" }
}
},
// remove the unnecessary _id object
{$project: {
_id: "$_id._id",
bacId: "$_id.bacId",
cardId: "$_id.cardId",
defaultCardOrder: "$_id.defaultCardOrder",
alias: "$_id.alias",
label: "$_id.label",
for: "$_id.for",
cardTooltip: "$_id.cardTooltip",
dataBlocks: "$dataBlocks"
}
}
])
Take into account that performance depends of size of your data set as the query unwinds twice and it may produce significant amount of intermediate documents.

Group multi-dimensional array after unwinding elements

Again with mongoDB. I really like aggregation, but still can't "get it".
So here is my array:
{
"_id" : ObjectId("55951b2bf41edfc80b00002a"),
"orders" : [
{
"id" : "55929142f41edfdc0f00002f",
"name" : "XYZ",
"id_basket" : 1,
"card" : [
{
"id" : "250",
"serial" : "B",
"type" : "9cf4161002b9eda349bb9c5ae64b9f4a",
"name" : "Eco",
"ticket" : [
{
"id" : "55927d41f41edfd00f000030",
"name" : "ZZZ",
"price" : {
"name" : "Normal",
"price" : "10",
"price_disp" : "10 €",
}
},
{
"id" : "55927d41f41edfd00f000030",
"name" : "ZZZ",
"price" : {
"name" : "Normal",
"price" : "10",
"price_disp" : "10 €",
}
}
]
},
{
"id" : "250",
"serial" : "B",
"type" : "9cf4161002b9eda349bb9c5ae64b9f4a",
"name" : "Eco",
"ticket" : [
{
"id" : "55927d41f41edfd00f000030",
"name" : "ZZZ",
"price" : {
"name" : "Normal",
"price" : "10",
"price_disp" : "10 €",
}
},
{
"id" : "55927d41f41edfd00f000030",
"name" : "ZZZ",
"price" : {
"name" : "Normal",
"price" : "10",
"price_disp" : "10 €",
}
}
]
}
],
"full_amount" : "40",
},
{
"id" : "55929142f41edfdc0f00002f",
"name" : "XYZ",
"id_basket" : 1,
"card" : [
{
"id" : "250",
"serial" : "B",
"type" : "9cf4161002b9eda349bb9c5ae64b9f4a",
"name" : "Eco",
"ticket" : [
{
"id" : "55927d41f41edfd00f000030",
"name" : "ZZZ",
"price" : {
"name" : "Normal",
"price" : "10",
"price_disp" : "10 €",
}
},
{
"id" : "55927d41f41edfd00f000030",
"name" : "ZZZ",
"price" : {
"name" : "Normal",
"price" : "10",
"price_disp" : "10 €",
}
}
]
},
{
"id" : "250",
"serial" : "B",
"type" : "9cf4161002b9eda349bb9c5ae64b9f4a",
"name" : "Eco",
"ticket" : [
{
"id" : "55927d41f41edfd00f000030",
"name" : "ZZZ",
"price" : {
"name" : "Normal",
"price" : "10",
"price_disp" : "10 €",
}
},
{
"id" : "55927d41f41edfd00f000030",
"name" : "ZZZ",
"price" : {
"name" : "Normal",
"price" : "10",
"price_disp" : "10 €",
}
}
]
}
],
"full_amount" : "40",
},
],
"rate" : "0.23",
"date" : "2015-07-02 13:04:34",
"id_user" : 97,
}
I want to output something like this:
{
"_id" : ObjectId("55951b2bf41edfc80b00002a"),
"orders" : [
{
"id" : "55929142f41edfdc0f00002f",
"name" : "XYZ",
"card" : [
{
"id" : "250",
"serial" : "B",
"name" : "Eco",
"ticket" : [
{
"id" : "55927d41f41edfd00f000030",
"name" : "ZZZ",
"price" : "10 €"
},
{
"id" : "55927d41f41edfd00f000030",
"name" : "ZZZ",
"price" : "10 €"
}
]
},
{
"id" : "250",
"serial" : "B",
"name" : "Eco",
"ticket" : [
{
"id" : "55927d41f41edfd00f000030",
"name" : "ZZZ",
"price" : "10 €"
},
{
"id" : "55927d41f41edfd00f000030",
"name" : "ZZZ",
"price" : "10 €"
}
]
}
],
"full_amount" : "40",
},
{
"id" : "55929142f41edfdc0f00002f",
"name" : "XYZ",
"card" : [
{
"id" : "250",
"serial" : "B",
"name" : "Eco",
"ticket" : [
{
"id" : "55927d41f41edfd00f000030",
"name" : "ZZZ",
"price" : "10 €"
},
{
"id" : "55927d41f41edfd00f000030",
"name" : "ZZZ",
"price" : "10 €"
}
]
},
{
"id" : "250",
"serial" : "B",
"name" : "Eco",
"ticket" : [
{
"id" : "55927d41f41edfd00f000030",
"name" : "ZZZ",
"price" : "10 €"
},
{
"id" : "55927d41f41edfd00f000030",
"name" : "ZZZ",
"price" : "10 €"
}
]
}
],
"full_amount" : "40",
},
],
"rate" : "0.23",
"date" : "2015-07-02 13:04:34",
}
I've tried many combinations with unwinding, projecting and grouping and failed to get what I want. Can someone help me with this?
You probably shouldn't be using the aggregation framework for tasks like this that do not actually "aggregate" anything between documents. This really is a "projection" task since all you are asking is to "alter" the structure of a document, and that is a task probably better suited to coding in the client after the document is retrieved.
A very good reason for this is that operations like $unwind are very costly in terms of performance. What $unwind does is produce a "copy" of the document content for each array member present, which results in a lot more documents to process.
Think of that like a "SQL Join" with a "one to many" relationship, the only difference being the data is self contained in one document. Processing $unwind simulates the "join" results in that the "master" (one) document contents are reproduced for every "child" (many) document.
In order to counter such operations being done by people, MongoDB 2.6 introduced the $map operator, which processes array elements within the document itself.
So instead of doing multiple ( or any ) $unwind actions, you can instead just process the arrays within the document itself using $map in a $project stage:
db.collection.aggregate([
{ "$project": {
"orders": { "$map": {
"input": "$orders",
"as": "o",
"in": {
"id": "$$o.id",
"name": "$$o.name",
"card": { "$map": {
"input": "$$o.card",
"as": "c",
"in": {
"id": "$$c.id",
"serial": "$$c.serial",
"name": "$$c.name",
"ticket": { "$map": {
"input": "$$c.ticket",
"as": "t",
"in": {
"id": "$$t.id",
"name": "$$t.name",
"price": "$$t.price.price_disp"
}
}}
}
}},
"full_amount": "$$o.full_amount"
}
}},
"rate": 1,
"date": 1
}}
])
The operations are fairly simple there as each "array" is assigned it's own variable name, and for a simple projection operation such as this all that is really left is selecting which fields you want.
In earlier versions, processing using $unwind is much more difficult:
db.collection.aggregate([
{ "$unwind": "$orders" },
{ "$unwind": "$orders.card" },
{ "$unwind": "$orders.card.ticket" },
{ "$group": {
"_id": {
"_id": "$_id",
"orders": {
"id": "$orders.id",
"name": "$orders.name",
"card": {
"id": "$orders.card.id",
"serial": "$orders.card.serial",
"name": "$orders.card.name"
},
"full_amount": "$orders.full_amount"
},
"rate": "$rate",
"date": "$date"
},
"ticket": {
"$push": {
"id": "$orders.card.ticket.id",
"name": "$orders.card.ticket.name",
"price": "$orders.card.ticket.price.price_disp"
}
}
}},
{ "$group": {
"_id": {
"_id": "$_id._id",
"orders": {
"id": "$_id.orders.id",
"name": "$_id.orders.name",
"full_amount": "$_id.orders.full_amount"
},
"rate": "$_id.rate",
"date": "$_id.date"
},
"card": {
"$push": {
"id": "$_id.orders.card.id",
"serial": "$_id.orders.card.serial",
"name": "$_id.orders.card.name",
"ticket": "$ticket"
}
}
}},
{ "$group": {
"_id": "$_id._id",
"orders": {
"$push": {
"id": "$_id.orders.id",
"name": "$_id.orders.name",
"card": "$card",
"full_amount": "$_id.orders.full_amount"
}
},
"rate": { "$first": "$_id.rate" },
"date": { "$first": "$_id.date" }
}}
])
So following through that carefully, you should see that since you $unwind three times it is necessary to $group "three times" as well, while carefully grouping all the distinct values at each "level" and re-constructing the arrays via $push.
This really is not advised at all as was mentioned earlier:
You "are not grouping/aggregating anything" and each sub-document "must" contain a "unique" itentifier because of the "grouping" operations required to re-construct arrays. ( See: NOTE )
The $unwind operation here is very costly. All of the document information is re-produced by a factor of "n" array X "n" array elements and so on. So there is much more data in the aggregation pipeline than your collection or query selection actually contains in itself.
Therefore in conclusion, for the general processing of "reformatting your data" you should instead be processing each document in your code rather than be "throwing it" at the aggregation pipeline to do.
If your document data requires "sufficient" manipulation that makes a "substantial difference" to the returned result size that you deem to be more efficient than pulling the whole document and manipulating in the client, then and "only" then should you be using the $project form as shown with the $map operations.
Sidebar
Your original "tag" here mentions "PHP".
All MongoDB queries including the aggregation have nothing language specific about them and are just "data structures" and are represented as such mostly in the "native form" for those languages (PHP,JavaScript,python,etc), and with "builder methods" for those languages without "native" expressive formats for free structures ( C,C#,Java ).
In all cases, there are simple parsers available for JSON, which is a common "linqua franca" here as the MongoB Shell itself is JavaScript based and understands JSON structre ( as actual JavaScript Objects ) natively.
So when working with such examples use tools like:
json_decode: to get more of an insight into how your native data structure is constructed.
json_encode: in order to check your native data structure against any JSON represented sample.
All content here is just simple "key/value" array() notation, though nested. But it is probably good practice to be aware of the tools and use them regularly.
NOTE:
The data sample you give looks very much like you have "cut and paste" data in order to create multiple items, as various "sub-items" all share the same "id" values.
Your "real" data should not do this! So I hope it does not, but if so then fix it.
In order to make the second example workable ( first is perfectly fine as is ) the data needs to be altered to included "unique" "id" values for each sub-element.
As I used here:
{
"_id" : ObjectId("55951b2bf41edfc80b00002a"),
"orders" : [
{
"id" : "55929142f41edfdc0f00002a",
"name" : "XYZ",
"card" : [
{
"id" : "250",
"serial" : "B",
"name" : "Eco",
"ticket" : [
{
"id" : "55927d41f41edfd00f000031",
"name" : "ZZZ",
"price" : "10 €"
},
{
"id" : "55927d41f41edfd00f000032",
"name" : "ZZZ",
"price" : "10 €"
}
]
},
{
"id" : "251",
"serial" : "B",
"name" : "Eco",
"ticket" : [
{
"id" : "55927d41f41edfd00f000033",
"name" : "ZZZ",
"price" : "10 €"
},
{
"id" : "55927d41f41edfd00f000034",
"name" : "ZZZ",
"price" : "10 €"
}
]
}
],
"full_amount" : "40",
},
{
"id" : "55929142f41edfdc0f00002b",
"name" : "XYZ",
"card" : [
{
"id" : "252",
"serial" : "B",
"name" : "Eco",
"ticket" : [
{
"id" : "55927d41f41edfd00f000035",
"name" : "ZZZ",
"price" : "10 €"
},
{
"id" : "55927d41f41edfd00f000036",
"name" : "ZZZ",
"price" : "10 €"
}
]
},
{
"id" : "253",
"serial" : "B",
"name" : "Eco",
"ticket" : [
{
"id" : "55927d41f41edfd00f000037",
"name" : "ZZZ",
"price" : "10 €"
},
{
"id" : "55927d41f41edfd00f000038",
"name" : "ZZZ",
"price" : "10 €"
}
]
}
],
"full_amount" : "40",
}
],
"rate" : "0.23",
"date" : "2015-07-02 13:04:34",
}