There is a document with nested arrays, just want to sort collection by customerId then productList.productId and then productList.itemList.id in ascending order. MongoDb version is 3.0.14.
I've tried so far this and the query doesn't sort the collection as expected:
db.file_old.find({}, {
customerId: 1,
"productList.productId": 1,
"productList.itemList.id": 1
})
.sort({
customerId: 1,
productList: 1,
"productList.itemList": 1
})
and try aggregate framework also like this:
db.file_old.aggregate([
{"$unwind": "$productList"} ,
{"$sort": {"customerId": 1, "productList.productId": 1}}
])
It work fine for two field but if try to adding "productList.itemList.id" doesn't work, like this:
db.file_old.aggregate([
{"$unwind": "$productList"} ,
{"$sort": {"customerId": 1, "productList.productId": 1, "productList.itemList.id": 1}}
])
Collection structure:
{
"_id" : ObjectId("5f33cc2a1e84082968132324"),
"customerId" : 2196,
"productList" : [
{
"productId" : 7531,
"itemList" : [
{
"id" : 144
},
{
"id" : 145
}
]
},
{
"productId" : 7534,
"itemList" : [
{
"id" : 1244
},
{
"id" : 1243
},
{
"id" : 1245
},
{
"id" : 1242
}
]
}
]
},{
"_id" : ObjectId("5f33cc2a1e84082968132326"),
"customerId" : 2201,
"productList" : [
{
"productId" : 101201,
"itemList" : [
{
"id" : 863
},
{
"id" : 865
},
{
"id" : 862
}
]
},
{
"productId" : 7537,
"itemList" : [
{
"id" : 982
},
{
"id" : 1002
},
{
"id" : 896
}
]
}
]
}
You can not sort directly array, first requires to unwind(deconstruct) and then sort will apply, lets see step by step,
productList
deconstruct array ($unwind)
itemList
deconstruct array ($unwind)
sort by id ($sort)
re-construct array ($group)
sort by productId ($sort)
re-construct productList ($group)
sort by customerId ($sort)
$unwind deconstruct productList array
db.collection.aggregate([
{ $unwind: "$productList" },
$unwind deconstruct productList.itemList array
{ $unwind: "$productList.itemList" },
$sort by productList.itemList.id ascending order
{ $sort: { "productList.itemList.id": 1 } },
$group by all 3 main level of ids and re-construct itemList array
{
$group: {
_id: {
_id: "$_id",
customerId: "$customerId",
productId: "$productList.productId"
},
itemList: { $push: "$productList.itemList" }
}
},
$sort by productId ascending order
{ $sort: { "_id.productId": 1 } },
$group by main 2 level of ids and re-construct productList array
{
$group: {
_id: {
_id: "$_id._id",
customerId: "$_id.customerId"
},
productList: {
$push: {
productId: "$_id.productId",
itemList: "$itemList"
}
}
}
},
$project to show required fields
{
$project: {
_id: "$_id._id",
customerId: "$_id.customerId",
productList: 1
}
},
$sort by customerId id
{ $sort: { customerId: 1 } }
])
Playground
Related
//8. isbn numbers of books that sold at least X copies (you decide the value for X).
Book example
{
isbn: "0001",
title: "Book1",
pages: NumberInt("150"),
price: NumberDecimal("321.2"),
copies: NumberInt("3"),
language: "english",
author: ["Author1"],
category: ["Space Opera"],
genre: ["Genre-1", "Genre-2"],
character: ["Character-1", "Character-2"],
},
Order example
{
orderNo: "3",
customerNo: "0003",
date: {
day: NumberInt("25"),
month: NumberInt("02"),
year: NumberInt("2021"),
},
orderLine: [
{
isbn: "0006",
price: NumberDecimal("341.0"),
amount: NumberInt("2"),
},
{
isbn: "0007",
price: NumberDecimal("170.5"),
amount: NumberInt("1"),
},
],
},
My try
I believe I have a mistake inside the pipeline at the group stage. For now I need at least to have isbn along with the copies sold in one object.
db.books.aggregate([ // editing this
{ $match : {} },
{
$lookup :
{
from : "orders",
pipeline : [
{
$group :
{
_id: null,
amount_total : { $sum : "$orderLine.amount" }
}
},
{ $project : { _id : 0, amount_total : 1} }
],
as : "amount"
}
},
{ $project : { _id : 0, isbn : 1, amount : 1} }
])
No idea why all are 0's, I was expecting at least some different numbers.
{
"isbn": "0001",
"amount": [
{
"amount_total": 0
}
]
},
{
"isbn": "0002",
"amount": [
{
"amount_total": 0
}
]
},
{
"isbn": "0003",
"amount": [
{
"amount_total": 0
}
]
},// and so on
Apparently, this does what I wanted.
db.books.aggregate([
{
$lookup: {
from: "orders",
let: { isbn: "$isbn" }, // Pass this variable to pipeline for Joining condition.
pipeline: [
{ $unwind: "$orderLine" },
{
$match: {
// Join condition.
$expr: { $eq: ["$orderLine.isbn", "$$isbn"] }
}
},
{
$project: { _id: 0 , orderNo : 1, "orderLine.amount": 1}
}
],
as: "amount"
}
}, { $project : { _id : 0, isbn : 1, amount_total : { $sum : "$amount.orderLine.amount" } } }
])
In your query $lookup is performing a join operation without any condition instead try this query:
db.books.aggregate([
{
$lookup: {
from: "orders",
let: { isbn: "$isbn" },
pipeline: [
{ $unwind: "$orderLine" },
{
$match: {
$expr: { $eq: ["$orderLine.isbn", "$$isbn"] }
}
}
],
as: "amount"
}
},
{
$project: {
_id: 0,
isbn: 1,
amount_total: { $sum: "$amount.orderLine.amount" }
}
}
]);
Test data:
books collection:
/* 1 createdAt:3/12/2021, 10:41:13 AM*/
{
"_id" : ObjectId("604af7f14b5860176c2254b7"),
"isbn" : "0001",
"title" : "Book1"
},
/* 2 createdAt:3/12/2021, 10:41:13 AM*/
{
"_id" : ObjectId("604af7f14b5860176c2254b8"),
"isbn" : "0002",
"title" : "Book2"
}
orders collection:
/* 1 createdAt:3/12/2021, 11:10:51 AM*/
{
"_id" : ObjectId("604afee34b5860176c2254ce"),
"orderNo" : "1",
"customerNo" : "0001",
"orderLine" : [
{
"isbn" : "0001",
"price" : 341,
"amount" : 2
},
{
"isbn" : "0002",
"price" : 170.5,
"amount" : 1
},
{
"isbn" : "0003",
"price" : 190.5,
"amount" : 3
}
]
},
/* 2 createdAt:3/12/2021, 11:10:51 AM*/
{
"_id" : ObjectId("604afee34b5860176c2254cf"),
"orderNo" : "3",
"customerNo" : "0003",
"orderLine" : [
{
"isbn" : "0001",
"price" : 341,
"amount" : 2
},
{
"isbn" : "0002",
"price" : 170.5,
"amount" : 1
},
{
"isbn" : "0003",
"price" : 190.5,
"amount" : 3
}
]
}
Output:
/* 1 */
{
"isbn" : "0001",
"amount_total" : 4
},
/* 2 */
{
"isbn" : "0002",
"amount_total" : 2
}
The $sum inside $group stage will sum root and grouped fields but here orderLine field is an array, you need to sum that array of numbers before applying $sum, it means nested $sum operation,
{
$group: {
_id: null,
amount_total: {
$sum: {
$sum: "$orderLine.amount"
}
}
}
}
Playground
Try the final solution,
$match isbn array in orderLine.isbn using $in condition
$filter to iterate look of orderLine array, and match isbn, it will return filtered documents
$let declare a orders variable to hold above filtered documents of orderLine, sum the amount from filtered array using $sum
$project to show required fields, and get total sum of amount_total array
db.books.aggregate([
{
$lookup: {
from: "orders",
let: { isbn: "$isbn" },
pipeline: [
{ $match: { $expr: { $in: ["$$isbn", "$orderLine.isbn"] } } },
{
$project: {
_id: 0,
amount_total: {
$let: {
vars: {
orders: {
$filter: {
input: "$orderLine",
cond: { $eq: ["$$this.isbn", "$$isbn"] }
}
}
},
in: { $sum: "$$orders.amount" }
}
}
}
}
],
as: "amount"
}
},
{
$project: {
_id: 0,
isbn: 1,
amount_total: { $sum: "$amount.amount_total" }
}
}
])
Playground
I have a dataset in mongodb collection named visitorsSession like
{ip : 192.2.1.1,country : 'US', type : 'Visitors',date : '2019-12-15T00:00:00.359Z'},
{ip : 192.3.1.8,country : 'UK', type : 'Visitors',date : '2019-12-15T00:00:00.359Z'},
{ip : 192.5.1.4,country : 'UK', type : 'Visitors',date : '2019-12-15T00:00:00.359Z'},
{ip : 192.8.1.7,country : 'US', type : 'Visitors',date : '2019-12-15T00:00:00.359Z'},
{ip : 192.1.1.3,country : 'US', type : 'Visitors',date : '2019-12-15T00:00:00.359Z'}
I am using this mongodb aggregation
[{$match: {
nsp : "/hrm.sbtjapan.com",
creationDate : {
$gte: "2019-12-15T00:00:00.359Z",
$lte: "2019-12-20T23:00:00.359Z"
},
type : "Visitors"
}}, {$group: {
_id : "$country",
totalSessions : {
$sum: 1
}
}}, {$project: {
_id : 0,
country : "$_id",
totalSessions : 1
}}, {$sort: {
country: -1
}}]
using above aggregation i am getting results like this
[{country : 'US',totalSessions : 3},{country : 'UK',totalSessions : 2}]
But i also total visitors also along with result like totalVisitors : 5
How can i do this in mongodb aggregation ?
You can use $facet aggregation stage to calculate total visitors as well as visitors by country in a single pass:
db.visitorsSession.aggregate( [
{
$match: {
nsp : "/hrm.sbtjapan.com",
creationDate : {
$gte: "2019-12-15T00:00:00.359Z",
$lte: "2019-12-20T23:00:00.359Z"
},
type : "Visitors"
}
},
{
$facet: {
totalVisitors: [
{
$count: "count"
}
],
countrySessions: [
{
$group: {
_id : "$country",
sessions : { $sum: 1 }
}
},
{
$project: {
country: "$_id",
_id: 0,
sessions: 1
}
}
],
}
},
{
$addFields: {
totalVisitors: { $arrayElemAt: [ "$totalVisitors.count" , 0 ] },
}
}
] )
The output:
{
"totalVisitors" : 5,
"countrySessions" : [
{
"sessions" : 2,
"country" : "UK"
},
{
"sessions" : 3,
"country" : "US"
}
]
}
You could be better off with two queries to do this.
To save the two db round trips following aggregation can be used which IMO is kinda verbose (and might be little expensive if documents are very large) to just count the documents.
Idea: Is to have a $group at the top to count documents and preserve the original documents using $push and $$ROOT. And then before other matches/filter ops $unwind the created array of original docs.
db.collection.aggregate([
{
$group: {
_id: null,
docsCount: {
$sum: 1
},
originals: {
$push: "$$ROOT"
}
}
},
{
$unwind: "$originals"
},
{ $match: "..." }, //and other stages on `originals` which contains the source documents
{
$group: {
_id: "$originals.country",
totalSessions: {
$sum: 1
},
totalVisitors: {
$first: "$docsCount"
}
}
}
]);
Sample O/P: Playground Link
[
{
"_id": "UK",
"totalSessions": 2,
"totalVisitors": 5
},
{
"_id": "US",
"totalSessions": 3,
"totalVisitors": 5
}
]
I need to count the size of an array object and I also need to get the averages for each field in the array labeled raised_amount. However, MongoDB will not let me count the array size after unwinding it(duh). Mongo will not let me count the array size before unwinding either. This is for a class I am taking. Quite the challenge.
db.research.aggregate({$unwind:"$funding_rounds"},
{"$group": {
"_id":{"name": "$name"},
"averageFunding" : {
"$avg" : "$funding_rounds.raised_amount"
}
}
},
{$project: { count: { $size:"$funding_rounds" }}},
{ $sort: { averageFunding: -1 } },
{"$limit":10})
Take out {$project: { count: { $size:"$funding_rounds" }}} and it works! However, I wouldn't have funding_round count. Try to count the rounds by themselves, and it works.
Example of data:
{
"name": "Facebook",
"total_money_raised": "$39.8M",
"funding_round": [
{
"example": 123,
"round_code": "a",
"raised_amount": "1232"
},
{
"example": 123,
"round_code": "bat",
"raised_amount": "1232"
},
{
"example": 123,
"round_code": "cat",
"raised_amount": "1232"
}
]
}
Any ideas on how to count the array size in this aggregation?
$size expect an array, and you $unwind array to object before counting. That's why MongoDB restrict to count size.
Try Below code:
db.getCollection('tests').aggregate([
{
$project: {
_id: 1,
name: 1,
total_money_raised : 1,
funding_round :1,
size: { $size:"$funding_round" }
}
},
{ $unwind : "$funding_round"},
{ $group:{
_id: "$name",
avgFunding : {"$avg" : "$funding_round.raised_amount"},
size: {$first : "$size"},
totalCount : {$sum: 1}
}
},
{ $sort: { "avgFunding": -1 } },
{ "$limit":10 }
])
Output:
/* 1 */
{
"_id" : "Facebook",
"avgFunding" : 1232.0,
"size" : 3,
"totalCount" : 3.0
}
If NAME field is unique:
Another thing that I need to mention here is if your name field is unique and you just want to have the size of an array you can then unwind and then count total documents while $group as below:
db.getCollection('tests').aggregate([
{ $unwind : "$funding_round"},
{ $group:{
_id: "$name",
"avgFunding" : {"$avg" : "$funding_round.raised_amount"},
size : {$sum: 1}
}
},
{ $sort: { "avgFunding": -1 } },
{ "$limit":10 }
])
Output:
/* 1 */
{
"_id" : "Facebook",
"avgFunding" : 1232.0,
"size" : 3.0
}
Where size is the total count of documents that are unwound from an array.
I have the following time series data stored in mongodb
{
"_id" : ObjectId("59a46062e1aeb958a712490e"),
"channelName" : "ABC",
"rtData" : [
{
"ts" : ISODate("2017-08-28T18:26:42.837Z"),
"data" : [ 676.297664, 676.297664 ]
},
{
"ts" : ISODate("2017-08-28T18:27:42.837Z"),
"data" : [ 724.297664, 676.297664 ]
},
{
"ts" : ISODate("2017-08-28T18:29:42.837Z"),
"data" : [ 878.297, 676.297 ]
}
]
}
I want to group the data based on the ts field on hour and get the first element of rtData for that hour.
Here is what I have tried
db.channels.aggregate( [ {$match: {"channelName": "ABC"} }, { $unwind : "$rtData" }, { $group : {_id: { $hour: "$rtData.ts" }, ucast: { $sum: $rtData.data[0]} }
But the above code gives me the following output
{ "_id" : 28, "ucast" : 0 }
What I actually want is
{ "_id" : 28, "ucast" : 676.297664 }
You don't notate getting a first element of an array in an aggregation pipeline like that. You want $arrayElemAt which returns the array value by index:
db.channels.aggregate( [
{ $match: {"channelName": "ABC"} },
{ $unwind : "$rtData" },
{ $group : {
_id: { $hour: "$rtData.ts" },
ucast: { $sum: { $arrayElemAt: [ "$rtData.data", 0 ] } }
}}
])
If your MongoDB does not support $arrayElemAt ( prior to 3.2 ), then you can instead use $first in an additional $group on just the document key, done before you "accumulate" for the desired grouping key:
db.channels.aggregate( [
{ $match: {"channelName": "ABC"} },
{ $unwind : "$rtData" },
{ $group: {
_id: { _id: "$_id", ts: "$rtData.ts" },
data: { $first: "$rtData.data" }
}},
{ $group : {
_id: { $hour: "$_id.ts" },
ucast: { $sum: "$data" }
}}
])
In modern versions you can "double barrel" the $sum to both add up array elements as well as act as an accumulator if you wanted to "sum" all elements of the array:
db.channels.aggregate( [
{ $match: {"channelName": "ABC"} },
{ $unwind : "$rtData" },
{ $group : {
_id: { $hour: "$rtData.ts" },
ucast: { $sum: { $sum: "$rtData.data" } }
}}
])
And with older versions ( prior to 3.2 ) you would "double" $unwind for each array path instead:
db.channels.aggregate( [
{ $match: {"channelName": "ABC"} },
{ $unwind : "$rtData" },
{ $unwind : "$rtData.data" },
{ $group : {
_id: { $hour: "$rtData.ts" },
ucast: { $sum: "$rtData.data" }
}}
])
you need to use $first operator for that instead of $sum
db.channels.aggregate( [ {$match: {"channelName": "ABC"} }, { $unwind : "$rtData" }, { $group : {_id: { $hour: "$rtData.ts" }, ucast: { $first: $rtData.data} }
which will give you output like { "_id" : 28, "ucast" : [ 676.297664, 676.297664 ] }
if you want output like { "_id" : 28, "ucast" : 676.297664 } in next $project or $addFields stage use $arrayElemAt
I have a bunch of documents in mongo with the following structure:
{
"_id" : "",
"number" : 2,
"colour" : {
"_id" : "",
"name" : "Green",
"hex" : "00ff00"
},
"position" : {
"_id" : "",
"name" : "Defence",
"type" : "position"
},
"ageGroup" : {
"_id" : "",
"name" : "Minor Peewee",
"type" : "age"
},
"companyId" : ""
}
I'm currently using Mongo's aggregate to group the documents by ageGroup.name which returns:
//Query
Jerseys.aggregate([
{$match: { companyId: { $in: companyId } } },
{$group: {_id: "$ageGroup.name", jerseys: { $push: "$$ROOT" }} }
]);
//returns
{
_id: "Minor Peewee",
jerseys: array[]
}
but I'd like it to also group by position.name within the age groups. ie:
{
_id: "Minor Peewee",
positions: array[]
}
//in positions array...
{
_id: "Defence",
jerseys: array[]
}
// or ageGroups->positions->jerseys if that makes more sense.
I've tried multiple groups but I don't think I'm setting them up correctly I always seem to get an array of _id's. I'm using Meteor as the server and I'm doing it within a meteor method.
You can use a composite aggregate _id in the first grouping stage.
Then, you can use one of those keys as the "main" _id of the final aggregate and $push the other into another array.
Jerseys.aggregate([
{
$match: { companyId: { $in: companyId } }
},
{
$group: { // each position and age group have an array of jerseys
_id: { position: "$position", ageGroup: "$ageGroup" },
jerseys: { $push: "$$ROOT" }
}
},
{
$group: { // for each age group, create an array of positions
_id: { ageGroup: "$_id.ageGroup" },
positions: { $push: { position: "$_id.position", jerseys:"$jerseys" } }
}
}
]);