Use $size with $sort in array and sub array - mongodb

Here's the structure part of my collection:
_id: ObjectId("W"),
names: [
{
number: 1,
subnames: [ { id: "X", day: 1 }, { id: "Y", day: 10 }, { id: "Z", day: 2 } ],
list: ["A","B","C"],
day: 1
},
{
number: 2,
day: 5
},
{
number: 3,
subnames: [ { id: "X", day: 8 }, { id: "Z", day: 5 } ],
list: ["A","C"],
day: 2
},
...
],
...
I use this request:
db.publication.aggregate( [ { $match: { _id: ObjectId("W") } }, { $group: { _id: "$_id", SizeName: { $first: { $size: { $ifNull: [ "$names", [] ] } } }, names: { $first: "$names" } } }, { $unwind: "$names" }, { $sort: { "names.day": 1 } }, { $group: { _id: "$_id", SzNames: { $sum: 1 }, names: { $push: { number: "$names.number", subnames: "$names.subnames", list: "$names.list", SizeList: { $size: { $ifNull: [ "$names.list", [] ] } } } } } } ] );
but I would now use $sort for my names array AND my subnames array to obtain this result (subnames may not exist) :
_id: ObjectId("W"),
names: [
{
number: 2,
SizeList: 0,
day: 5
},
{
number: 3,
subnames: [ { id: "Z", day: 5 }, { id: "X", day: 8 } ],
list: ["A","C"],
SizeList: 2,
day: 2
},
{
number: 1,
subnames: [ { id: "X", day: 1 }, { id: "Z", day: 2 }, { id: "Y", day: 10 } ],
list: ["A","B","C"],
SizeList: 3,
day: 1
}
...
],
...
Can you help me ?

You can do this, but with great difficulty. I for one would gladly vote for an inline version of $sort along the lines of the $map operator. That would makes things so much easier.
For now though you need to de-construct and re-build the arrays after sorting. And you have to be very careful about this. Hence make false arrays with a single entry before processing $unwind:
db.publication.aggregate([
{ "$project": {
"SizeNames": {
"$size": {
"$ifNull": [ "$names", [] ]
}
},
"names": { "$ifNull": [{ "$map": {
"input": "$names",
"as": "el",
"in": {
"SizeList": {
"$size": {
"$ifNull": [ "$$el.list", [] ]
}
},
"SizeSubnames": {
"$size": {
"$ifNull": [ "$$el.subnames", [] ]
}
},
"number": "$$el.number",
"day": "$$el.day",
"subnames": { "$ifNull": [ "$$el.subnames", [0] ] },
"list": "$$el.list"
}
}}, [0] ] }
}},
{ "$unwind": "$names" },
{ "$unwind": "$names.subnames" },
{ "$sort": { "_id": 1, "names.subnames.day": 1 } },
{ "$group": {
"_id": {
"_id": "$_id",
"SizeNames": "$SizeNames",
"names": {
"SizeList": "$names.SizeList",
"SizeSubnames": "$names.SizeSubnames",
"number": "$names.number",
"list": "$names.list",
"day": "$names.day"
}
},
"subnames": { "$push": "$names.subnames" }
}},
{ "$sort": { "_id._id": 1, "_id.names.day": 1 } },
{ "$group": {
"_id": "$_id._id",
"SizeNames": { "$first": "$_id.SizeNames" },
"names": {
"$push": { "$cond": [
{ "$ne": [ "$_id.names.SizeSubnames", 0 ] },
{
"number": "$_id.names.number",
"subnames": "$subnames",
"list": "$_id.names.list",
"SizeList": "$_id.names.SizeList",
"day": "$_id.names.day"
},
{
"number": "$_id.names.number",
"list": "$_id.names.list",
"SizeList": "$_id.names.SizeList",
"day": "$_id.names.day"
}
]}
}
}},
{ "$project": {
"SizeNames": 1,
"names": {
"$cond": [
{ "$ne": [ "$SizeNames", 0 ] },
"$names",
[]
]
}
}}
])
You can kind of "hide away" the original empty array from the inner document as shown, but it's really difficult to remove all presence of the outer "names" array without pulling a similar conditional array "push" technique, and that really isn't a practical approach.
If all of this is just about sorting array elements in individual documents though, the aggregation framework should not be the tool to do this. It can be done as shown, but per document this is much easier to do in client side code.
Output:
{
"_id" : ObjectId("54b5cff8102f292553ce9bb5"),
"SizeNames" : 3,
"names" : [
{
"number" : 1,
"subnames" : [
{
"id" : "X",
"day" : 1
},
{
"id" : "Z",
"day" : 2
},
{
"id" : "Y",
"day" : 10
}
],
"list" : [
"A",
"B",
"C"
],
"SizeList" : 3,
"day" : 1
},
{
"number" : 3,
"subnames" : [
{
"id" : "Z",
"day" : 5
},
{
"id" : "X",
"day" : 8
}
],
"list" : [
"A",
"C"
],
"SizeList" : 2,
"day" : 2
},
{
"number" : 2,
"SizeList" : 0,
"day" : 5
}
]
}

Related

Grouping into array in MongoDB

I have MongoDB collection with below documents:
[
{
"productType":"Bike",
"company":"yamaha",
"model":"y1"
},
{
"productType":"Bike",
"company":"bajaj",
"model":"b1"
},
{
"productType":"Bike",
"company":"yamaha",
"model":"y1"
},
{
"productType":"Car",
"company":"Maruti",
"model":"m1"
},
{
"productType":"Bike",
"company":"yamaha",
"model":"y2"
},
{
"productType":"Car",
"company":"Suzuki",
"model":"s1"
}
]
I want my output to be like :
{
"productType": [
{
"name": "Bike",
"count": 4,
"companies": [
{
"name": "Yamaha",
"count": 3,
"models": [
{
"name": "y1",
"count": 2
},
{
"name": "y2",
"count": 1
}
]
},
{
"name": "Bajaj",
"count": 1,
"models": [
{
"name": "b1",
"count": 1
}
]
}
]
},
{
"name": "Car",
"count": 2,
"companies": [
{
"name": "Maruti",
"count": 1,
"models": [
{
"name": "m1",
"count": 1
}
]
},
{
"name": "Suzuki",
"count": 1,
"models": [
{
"name": "s1",
"count": 1
}
]
}
]
}
]
}
I am not able to understand how to create arrays inside existing array using $push. I know we can create an array using $push but how to create array of array with it ?
In future, I might want to add "metaData" field also along with name and count.
You have to run multiple $group stages, one for each level:
db.collection.aggregate([
{
$group: {
_id: { company: "$company", productType: "$productType", model: "$model" },
count: { $sum: 1 }
}
},
{
$group: {
_id: { productType: "$_id.productType", company: "$_id.company" },
models: { $push: { name: "$_id.model", count: "$count" } },
count: { $sum: "$count" }
}
},
{
$group: {
_id: "$_id.productType",
companies: { $push: { company: "$_id.company", models: "$models", count: "$count" } },
count: { $sum: "$count" }
}
},
{ $set: { name: "$_id", _id: "$$REMOVE" } },
{
$group: {
_id: null,
productType: { $push: "$$ROOT" }
}
}
])
Mongo Playground
Try this:
db.testCollection.aggregate([
{
$group: {
_id: {
name: "$productType",
company: "$company",
model: "$model"
},
count: { $sum: 1 }
}
},
{
$group: {
_id: {
name: "$_id.name",
company: "$_id.company"
},
count: { $sum: "$count" },
models: {
$push: {
name: "$_id.model",
count: "$count"
}
}
}
},
{
$group: {
_id: { name: "$_id.name" },
count: { $sum: "$count" },
companies: {
$push: {
name: "$_id.company",
count: "$count",
models: "$models"
}
}
}
},
{
$group: {
_id: null,
productType: {
$push: {
name: "$_id.name",
count: "$count",
companies: "$companies"
}
}
}
},
{
$project: { _id: 0 }
}
]);
Output:
{
"productType" : [
{
"name" : "Car",
"count" : 2,
"companies" : [
{
"name" : "Suzuki",
"count" : 1,
"models" : [
{
"name" : "s1",
"count" : 1
}
]
},
{
"name" : "Maruti",
"count" : 1,
"models" : [
{
"name" : "m1",
"count" : 1
}
]
}
]
},
{
"name" : "Bike",
"count" : 4,
"companies" : [
{
"name" : "yamaha",
"count" : 3,
"models" : [
{
"name" : "y2",
"count" : 1
},
{
"name" : "y1",
"count" : 2
}
]
},
{
"name" : "bajaj",
"count" : 1,
"models" : [
{
"name" : "b1",
"count" : 1
}
]
}
]
}
]
}

Fill day gaps of two-dimensional timeseries data in MongoDB with aggregate

I have a collection of two-dimensional timeseries data as follows:
[
{
"value" : 9,
"timestamp" : "2020-12-30T02:06:33.000+0000",
"recipeId" : 15
},
{
"value" : 2,
"timestamp" : "2020-12-30T12:04:23.000+0000",
"recipeId" : 102
},
{
"value" : 5,
"timestamp" : "2020-12-30T15:09:23.000+0000",
"recipeId" : 102
},
...
]
The records have a recipeId which is the first level of grouping I'm looking for. All values for a day of a recipe should be summed up. I want an array of timeseries per recipeId. I need the missing days to be filled with a 0. I want this construct to be created for a provided start and end date range.
Some like this for date range of 2020-12-29 to 2020-12-31:
[
[
{
"sum" : 0,
"timestamp" : "2020-12-29",
"recipeId" : 15
},
{
"sum" : 9,
"timestamp" : "2020-12-30",
"recipeId" : 15
},
{
"sum" : 0,
"timestamp" : "2020-12-31",
"recipeId" : 15
},
...
],
[
{
"sum" : 0,
"timestamp" : "2020-12-29",
"recipeId" : 0
},
{
"sum" : 7,
"timestamp" : "2020-12-30",
"recipeId" : 102
},
{
"sum" : 0,
"timestamp" : "2020-12-31",
"recipeId" : 102
},
...
]
]
This is what I currently have and it's only partially solving my requirements. I can't manage to get the last few stages right:
[
{
"$match": {
"timestamp": {
"$gte": "2020-12-29T00:00:00.000Z",
"$lte": "2020-12-31T00:00:00.000Z"
}
}
},
{
"$addFields": {
"timestamp": {
"$dateFromParts": {
"year": { "$year": "$timestamp" },
"month": { "$month": "$timestamp" },
"day": { "$dayOfMonth": "$timestamp" }
}
},
"dateRange": {
"$map": {
"input": {
"$range": [
0,
{
"$trunc": {
"$divide": [
{
"$subtract": [
"2020-12-31T00:00:00.000Z",
"2020-12-29T00:00:00.000Z"
]
},
1000
]
}
},
86400
]
},
"in": {
"$add": [
"2020-12-29T00:00:00.000Z",
{ "$multiply": ["$$this", 1000] }
]
}
}
}
}
},
{ "$unwind": "$dateRange" },
{
"$group": {
"_id": { "date": "$dateRange", "recipeId": "$recipeId" },
"count": {
"$sum": { "$cond": [{ "$eq": ["$dateRange", "$timestamp"] }, 1, 0] }
}
}
},
{
"$group": {
"_id": "$_id.date",
"total": { "$sum": "$count" },
"byRecipeId": {
"$push": {
"k": { "$toString": "$_id.recipeId" },
"v": { "$sum": "$count" }
}
}
}
},
{ "$sort": { "_id": 1 } },
{
"$project": {
"_id": 0,
"timestamp": "$_id",
"total": "$total",
"byRecipeId": {
"$arrayToObject": {
"$filter": { "input": "$byRecipeId", "cond": "$$this.v" }
}
}
}
}
]
which results in:
[
{
"timestamp": "2020-12-29T00:00:00.000Z",
"total": 21,
"byRecipeId": {}
},
{
"timestamp": "2020-12-30T00:00:00.000Z",
"total": 0,
"byRecipeId": {
"15": 9,
"102": 7
}
},
{
"timestamp": "2020-12-31T00:00:00.000Z",
"total": 0,
"byRecipeId": {}
}
]
I'm open to alternative solution of course. For examples I came across this post: https://medium.com/#alexandro.ramr777/fill-missing-values-using-mongodb-aggregation-framework-f011114e83e0 but it doesn't deal with multi-dimensions.
You could use the $redcue function. This code fills the gabs of Minutes for current day. Should be easy to adapt it to give missing Days.
{
$addFields: {
data: {
$reduce: {
input: { $range: [0, 24 * 60] },
initialValue: [],
in: {
$let: {
vars: {
ts: {
$add: [
moment().startOf('day').toDate(),
{ $multiply: ["$$this", 1000 * 60] }
]
}
},
in: {
$concatArrays: [
"$$value",
[{
$cond: {
if: { $in: ["$$ts", "$data.timestamp"] },
then: {
$first: {
$filter: {
input: "$data",
cond: { $eq: ["$$this.timestamp", "$$ts"] }
}
}
},
else: { timestamp: "$$ts", total: 0 }
}
}]
]
}
}
}
}
}
}
}
In my opinion, $reduce is more elegant than $map, however based on my experience the performance is much worse with $reduce.

Compare integers stored as Strings in Mongodb

In the below collection, column "qty" holds the integer values but the datatype is string.
I want to compare the "qty" field with an integer in the aggregate and "warehouse" field with a string "A". ("qty" > 2 and "warehouse" = "A")
[Can't change the datatype in the collection to integer as huge dependency is present]
Edit : Need to retrieve all the columns and all the documents matching the criteria.
Query : getting improper results
db.runCommand(
{
aggregate: "products", pipeline: [
{
$match: {
instock: {
$elemMatch: {
warehouse: "A",
qty: { $gt: "2" }
}
}
}
},
{ $project: { _id: 0 } }],
cursor: { batchSize: 200 }
});
Result : not getting documents where item = journal though it satisfies the conditions
/* 1 */
{
"item" : "paper",
"instock" : [
{
"warehouse" : "A",
"qty" : "60"
},
{
"warehouse" : "B",
"qty" : "15"
}
]
},
/* 2 */
{
"item" : "planner",
"instock" : [
{
"warehouse" : "A",
"qty" : "22"
},
{
"warehouse" : "B",
"qty" : "5"
}
]
}
Products Collection
[
{
"item": "journal",
"instock": [
{
"warehouse": "A",
"qty": "11"
},
{
"warehouse": "C",
"qty": "15"
}
]
},
{
"item": "paper",
"instock": [
{
"warehouse": "A",
"qty": "60"
},
{
"warehouse": "B",
"qty": "15"
}
]
},
{
"item": "planner",
"instock": [
{
"warehouse": "A",
"qty": "22"
},
{
"warehouse": "B",
"qty": "5"
}
]
}
]
Getting improper results as greater than operator in this case is working lexicographically but it should work like integers. Though I tried converting that to double but I am getting no results.
Query with $convert to double : no result
db.runCommand(
{
aggregate: "products", pipeline: [
//{ $match: { "item": { $in: ["planner", "paper","journal"] } } },
{
$match: {
instock: {
$elemMatch: {
warehouse: "A",
qty: {
$gt: [
{$convert:{ input: "$qty", to: "double" }}, 5]
}
}
}
}
},
{ $project: { _id: 0 } }],
cursor: { batchSize: 200 }
});
Try this:
db.products.aggregate([
{
$unwind: "$instock"
},
{
$match: {
$expr: {
$and: [
{
$eq: [
"$instock.warehouse",
"A"
]
},
{
$gt: [
{
$toInt: "$instock.qty"
},
2
]
}
]
}
}
},
{
$group: {
_id: "$_id",
item: {
$first: "$item"
},
instock: {
$push: "$instock"
}
}
},
{
$project: {
_id: 0
}
}
])
MongoPlayground
Try this, it uses $filter to retain objects has criteria :
db.runCommand(
{
aggregate: "products", pipeline: [
{ $match: { 'instock.warehouse': 'A' } },
{
$addFields: {
instockCheck: {
$filter: {
input: '$instock', as: 'each', cond: {
$and: [{ $gt: [{ $toInt: '$$each.qty' }, 2] },
{ $eq: ['$$each.warehouse', 'A'] }]
}
}
}
}
}, { $match: { instockCheck: { $gt: [] } } }, { $project: { instockCheck: 0, _id: 0 } }],
cursor: { batchSize: 200 }
});
Test : MongoDB-Playground

MongoDB avoid duplicates using $addToSet in aggregation pipeline

there is aggregation pipeline:
db.getCollection('yourCollection').aggregate(
{
$unwind: {
path: "$dates",
includeArrayIndex: "idx"
}
},
{
$project: {
_id: 0,
dates: 1,
numbers: { $arrayElemAt: ["$numbers", "$idx"] },
goals: { $arrayElemAt: ["$goals", "$idx"] },
durations: { $arrayElemAt: ["$durations", "$idx"] }
}
}
)
which perform on the following data (sample documents):
{
"_id" : ObjectId("52d017d4b60fb046cdaf4851"),
"dates" : [
1399518702000,
1399126333000,
1399209192000,
1399027545000
],
"dress_number" : "4",
"name" : "J. Evans",
"numbers" : [
"5982",
"5983",
"5984",
"5985"
],
"goals": [
"1",
"0",
"4",
"2"
],
"durations": [
"78",
"45",
"90",
"90"
]
}
{
"_id" : ObjectId("57e250c1b60fb0213d06737c"),
"dates" : [
"1399027545000",
"1399101432000",
"1399026850000",
"1399904504000"
],
"dress_number" : "6",
"name" : K. Mitnick,
"numbers" : [
"0982",
"0981",
"0958",
"0982"
],
"durations" : [
98,
110,
66,
92
],
"goals" : [
"2",
"3",
"0",
"1"
]
}
The query works good, but there are duplicate records so I'm trying to use $addToSet operator to avoid duplicates:
db.getCollection('yourCollection').aggregate(
{
$match: {
"number": number
}
},
{
$unwind: {
path: "$dates",
includeArrayIndex: "idx"
}
},
$group: {
_id: '$_id',
dates: { $addToSet: '$dates' }
},
{
$project: {
_id: 0,
dates: 1,
numbers: { $arrayElemAt: ["$numbers", "$idx"] },
goals: { $arrayElemAt: ["$goals", "$idx"] },
durations: { $arrayElemAt: ["$durations", "$idx"] }
}
}
)
but I got only dates (other field are null)
{ dates:
[ '1399026850000',
'1399101432000',
'1399027545000',
'1399904504000',
'1399024474000',
'1399126333000' ],
numbers: null,
goals: null,
durations: null },
{ dates:
[ '1399027545000',
'1399024474000',
'1399518702000',
'1399126333000',
'1399209192000',
'1399356651000' ],
numbers: null,
goals: null,
conversation_durations: null },
{ dates:
[ '1399026850000',
'1399101432000',
'1399027545000',
'1399904504000',
'1399024474000' ],
numbers: null,
goals: null,
durations: null }
Does anybody know where is the problem?
You need to include the fields within the $group pipeline using the $first operator as follows:
db.getCollection('yourCollection').aggregate([
{ "$unwind": "$dates" },
{
"$group": {
"_id": "$_id",
"dates": { "$addToSet": "$dates" },
"numbers": { "$first": "$numbers" },
"goals": { "$first": "$goals" },
"durations": { "$first": "$durations" }
}
},
{ "$unwind": {
"path": "$dates",
"includeArrayIndex": "idx"
} },
{
"$project": {
"_id": 0,
"dates": 1,
"numbers": { "$arrayElemAt": ["$numbers", "$idx"] },
"goals": { "$arrayElemAt": ["$goals", "$idx"] },
"durations": { "$arrayElemAt": ["$durations", "$idx"] }
}
}
])
or using $setUnion to eliminate duplicates as:
db.getCollection('yourCollection').aggregate([
{
"$project": {
"_id": 0,
"dates": { "$setUnion": ["$dates", "$dates"] },
"numbers": 1,
"goals": 1,
"durations": 1
}
}
{ "$unwind": {
"path": "$dates",
"includeArrayIndex": "idx"
} },
{
"$project": {
"_id": 0,
"dates": 1,
"dateIndex": "$idx",
"numbers": { "$arrayElemAt": ["$numbers", "$idx"] },
"goals": { "$arrayElemAt": ["$goals", "$idx"] },
"durations": { "$arrayElemAt": ["$durations", "$idx"] }
}
}
])

Sum of two fields based on some conditions

These are my documents in Stat collection:
{placeid: 'plaza', guestid: "xxx", logtype: "purchase", value: 12}
{placeid: 'plaza', guestid: "xxx", logtype: "visit", value: 0}
{placeid: 'plaza', guestid: "xxx", logtype: "purchase", value: 17}
{placeid: 'plaza', guestid: "yyy", logtype: "visit", value: 0}
I want to aggregate these documents to get information (passed plaza as argument):
xxx visited: 1, purchases: 2, value of purchases is 29
yyy visited: 1, purchases: 0, value of purchases is 0
This is my approach:
Stat.aggregate(
[
{ $match: { placeid: "plaza" } },
{
$group: {
_id: "$guestid",
totallogs: { $sum: 1 },
totalvalue: { $sum: "$value" },
}
}
]
)
problem here is that this aggregation does not take logtype into consideration.
And I do not know how to improve it. Any help?
You need to use the aggregation framework to $group your documents by "guestid" and use the $sum accumulator operator to return the sum. Of course you also need the $cond operator evaluates the value of "logtype" and returns the value of the "true case".
db.Stat.aggregate([
{ "$match": { "placeid": "plaza" } },
{ "$group": {
"_id": "$guestid",
"visit": {
"$sum": {
"$cond": [
{ "$eq": [ "$logtype", "visit" ] },
1,
0
]
}
},
"purchases": {
"$sum": {
"$cond": [
{ "$eq": [ "$logtype", "purchase" ] },
1,
0
]
}
},
"value_purchase": {
"$sum": {
"$cond": [
{ "$eq": [ "$logtype", "purchase" ] },
"$value",
0
]
}
}
}}
])
which produces:
{ "_id" : "yyy", "visit" : 1, "purchases" : 0, "value_purchase" : 0 }
{ "_id" : "xxx", "visit" : 1, "purchases" : 2, "value_purchase" : 29 }