I'm not very experienced with mongodb, so maybe someone can help me.
I have a collection kind of like this:
{ user: 1, type: 0 , ... },
{ user: 1, type: 1, ... },
{ user: 1, type: 1, ... },
...
Now I want for a specific user documents grouped by type, then sorted + offset & limit. ie. like:
[ { $match: { user: 1 } },
{ $facet: {
type0: [
{ $match: { type: 0 },
{ $sort: ... },
{ $skip: offset0 },
{ $limit: limit0 }
],
type1: [
{ $match: { type: 1 },
{ $sort: ... },
{ $skip: offset1 },
{ $limit: limit1 }
]
}
]
This works, but: Is there a way to also get the total count of every type? $facet within $facet is not allowed, but maybe some completly different way? Or will a have to make an aggregate call for every single type in the end?
Something like this:
db.collection.aggregate([
{
$match: {
user: 1
}
},
{
$facet: {
type0: [
{
$match: {
type: 0
}
},
{
$sort: {
type: -1
}
},
{
$skip: 1
},
{
$limit: 2
}
],
type1: [
{
$match: {
type: 1
}
},
{
$sort: {
type: -1
}
},
{
$skip: 1
},
{
$limit: 2
}
],
counts: [
{
$group: {
"_id": "$type",
cnt: {
$sum: 1
}
}
},
{
$project: {
type: "$_id",
cnt: 1,
_id: 0
}
}
]
}
}
])
Explained:
You add one more stage named "counts" in the faced to group the total count of the two different types.
playground
Related
I am running the following aggregation pipeline:
const agg = [
{
'$match': {
'aaa': 'bbb'
}
}, {
'$group': {
'_id': '',
'total': {
'$sum': '$num'
}
}
}
];
My problem is, when $match matches nothing, the pipeline returns 0 documents. How do I get the pipeline to always return 1 document?
In MongoDB version 6.0 you can do it like this one:
db.collection.aggregate([
{ $match: { aaa: 'bbb' } },
{
$group: {
_id: null,
total: { $sum: "$num" }
}
},
{
$densify: {
field: "total",
range: { step: 1, bounds: [0, 0] }
}
},
{ $set: { _id: { $cond: [{ $eq: [{ $type: "$_id" }, "missing"] }, MaxKey, "$_id"] } } },
{ $sort: { _id: 1 } },
{ $limit: 1 }
])
In version < 6.0 you can try this one:
db.collection.aggregate([
{
$facet: {
data: [
{ $match: { aaa: 'bbb' } },
{ $group: { _id: null, total: { $sum: "$num" } } }
],
default: [
{ $limit: 1 },
{ $group: { _id: null, total: { $sum: 0 } } },
{ $set: { _id: MaxKey } }
]
}
},
{ $replaceWith: { $mergeObjects: [{ $first: "$default" }, { $first: "$data" }] } },
])
Or this one:
db.collection.aggregate([
{ $match: { aaa: 'bbb' } },
{ $group: { _id: null, total: { $sum: "$num" } } },
{
$unionWith: {
coll: "collection",
pipeline: [
{ $limit: 1 },
{ $set: { _id: MaxKey, total: 0 } },
{ $project: { _id: 1, total: 1 } }
]
}
},
{ $sort: { _id: 1 } },
{ $limit: 1 }
])
Would like to query the following to obtain all item documents such that the last sale (ordered by soldDate) has a status of 2.
db.items.insertMany([
{ item: 1,
sales: [
{ soldDate: ISODate("2021-10-04"), status: 1 },
{ soldDate: ISODate("2021-10-05"), status: 2 }
]
},
{ item: 2,
sales: [
{ soldDate: ISODate("2021-09-29"), status: 3 },
{ soldDate: ISODate("2021-09-24"), status: 1 }
]
},
{ item: 3,
sales: [
{ soldDate: ISODate("2021-06-01"), status: 3 },
{ soldDate: ISODate("2021-06-12"), status: 2 },
{ soldDate: ISODate("2021-06-07"), status: 1 }
]
}
]);
So in this example, the query would return the following two documents:
{ item: 1,
sales: [
{ soldDate: ISODate("2021-10-04"), status: 1 },
{ soldDate: ISODate("2021-10-05"), status: 2 } // triggered by this
]
},
{ item: 3,
sales: [
{ soldDate: ISODate("2021-06-01"), status: 3 },
{ soldDate: ISODate("2021-06-12"), status: 2 }, // triggered by this
{ soldDate: ISODate("2021-06-07"), status: 1 }
]
}
Thanks for any help.
You stated: ordered by soldDate which can actually mean two things. Perhaps you want the documents sorted by the array, or perhaps you mean the array is sorted. I assumed the later.
Solution (Array sorted)
db.items.aggregate([
{ $match: { "sales.status": 2} },
{ $unwind: "$sales" },
{ $sort: { "item": 1, "sales.soldDate": 1} },
{ $group: { "_id": "$_id", "item": { $first: "$item" }, "sales": { $push: "$sales" } } }
])
Results
Enterprise replSet [primary] barrydb> db.items.aggregate([
... { $match: { "sales.status": 2} },
... { $unwind: "$sales" },
... { $sort: { "item": 1, "sales.soldDate": 1} },
... { $group: { "_id": "$_id", "item": { $first: "$item" }, "sales": { $push: "$sales" } } }
... ])
[
{
_id: ObjectId("617064519be05d9f1cbab346"),
item: 1,
sales: [
{ soldDate: ISODate("2021-10-04T00:00:00.000Z"), status: 1 },
{ soldDate: ISODate("2021-10-05T00:00:00.000Z"), status: 2 }
]
},
{
_id: ObjectId("617064519be05d9f1cbab348"),
item: 3,
sales: [
{ soldDate: ISODate("2021-06-01T00:00:00.000Z"), status: 3 },
{ soldDate: ISODate("2021-06-07T00:00:00.000Z"), status: 1 },
{ soldDate: ISODate("2021-06-12T00:00:00.000Z"), status: 2 }
]
}
]
But, to be complete here is a solution if you want the documents sorted (and the array not necessarily sorted).
Solution (Documents sorted)
db.items.aggregate([
{ $match: { "sales.status": 2} },
{ $sort: { "sales.soldDate": 1} }
])
Results
Enterprise replSet [primary] barrydb> db.items.aggregate([
... { $match: { "sales.status": 2} },
... { $sort: { "sales.soldDate": 1} }
... ])
[
{
_id: ObjectId("617064519be05d9f1cbab348"),
item: 3,
sales: [
{ soldDate: ISODate("2021-06-01T00:00:00.000Z"), status: 3 },
{ soldDate: ISODate("2021-06-12T00:00:00.000Z"), status: 2 },
{ soldDate: ISODate("2021-06-07T00:00:00.000Z"), status: 1 }
]
},
{
_id: ObjectId("617064519be05d9f1cbab346"),
item: 1,
sales: [
{ soldDate: ISODate("2021-10-04T00:00:00.000Z"), status: 1 },
{ soldDate: ISODate("2021-10-05T00:00:00.000Z"), status: 2 }
]
}
]
EDIT - After re-reading I believe you want only where the record having a status of 2 is also has the greatest date in the array
Solution (Only last having status of value 2 - docs and array unsorted)
db.items.aggregate([
{ $unwind: "$sales" },
{ $sort: { "item": 1, "sales.soldDate": -1} },
{ $group: { "_id": "$_id", "item": { $first: "$item" }, "sales": { $push: "$sales" } } },
{ $match : { "sales.0.status" : 2 } }
])
Results
Enterprise replSet [primary] barrydb> db.items.aggregate([
... { $unwind: "$sales" },
... { $sort: { "item": 1, "sales.soldDate": -1} },
... { $group: { "_id": "$_id", "item": { $first: "$item" }, "sales": { $push: "$sales" } } },
... { $match : { "sales.0.status" : 2 } }
... ])
[
{
_id: ObjectId("617064519be05d9f1cbab346"),
item: 1,
sales: [
{ soldDate: ISODate("2021-10-05T00:00:00.000Z"), status: 2 },
{ soldDate: ISODate("2021-10-04T00:00:00.000Z"), status: 1 }
]
},
{
_id: ObjectId("617064519be05d9f1cbab348"),
item: 3,
sales: [
{ soldDate: ISODate("2021-06-12T00:00:00.000Z"), status: 2 },
{ soldDate: ISODate("2021-06-07T00:00:00.000Z"), status: 1 },
{ soldDate: ISODate("2021-06-01T00:00:00.000Z"), status: 3 }
]
}
]
EDIT - Add Self Referencing Lookup
db.items.aggregate([
{ $unwind: "$sales" },
{ $sort: { "item": 1, "sales.soldDate": -1} },
{ $group: { "_id": "$_id", "item": { $first: "$item" }, "sales": { $push: "$sales" } } },
{ $match : { "sales.0.status" : 2 } },
{ $lookup : {
from: "items",
localField: "_id",
foreignField: "_id",
as: "results"
}
},
{ $unwind: "$results" },
{ $replaceRoot: { "newRoot": "$results" } }
])
With the self-referencing lookup we are treating MongoDB as a relational database. We find the documents that meet our requirements, but in doing so we have destroyed the original shape and content. By performing a lookup on the same records we can restore the shape but at a performance penalty.
Retain Copy
Rather than performing a lookup, which has a performance concern, a different approach is to leverage memory on the server. Keep a copy of the original while moving through the pipeline and manipulating the original to identify desired records...
db.items.aggregate([
{ $addFields: { "_original": "$$ROOT" } },
{ $unwind: "$sales" },
{ $sort: { "item": 1, "sales.soldDate": -1} },
{ $group: { "_id": "$_id", "_original": { $first: "$_original" }, "sales_status": { $push: "$sales.status" } } },
{ $match : { "sales_status.0" : 2 } },
{ $replaceRoot: { "newRoot": "$_original" } }
])
In this example we keep a copy of the original in the field _original then once we have identified the records we want we pivot the root back to _original. This may put pressure on the WiredTiger cache as we are keeping a duplicate of all selected records in memory during the execution of the pipeline. A $lookup approach also has this memory concern. Two queries would eliminate the cache pressure issues, but behaves like a $lookup and would not perform as well.
I want to find prev/next blog documents whose publish date is closest to the input document.
Below is the document structure.
Collection Examples (blog)
{
blogCode: "B0001",
publishDate: "2020-09-21"
},
{
blogCode: "B0002",
publishDate: "2020-09-22"
},
{
blogCode: "B0003",
publishDate: "2020-09-13"
},
{
blogCode: "B0004",
publishDate: "2020-09-24"
},
{
blogCode: "B0005",
publishDate: "2020-09-05"
}
If the input is blogCode = B0003
Expected output
{
blogCode: "B0005",
publishDate: "2020-09-05"
},
{
blogCode: "B0001",
publishDate: "2020-09-21"
}
How could I get the output result? In sql, it seems using ROW_NUMBER can solve my problem, however I can't find a solution to achieve the feature in MongoDB. The alternate solution may be reference to this answer (But, it seems inefficient). Maybe using mapReduce is another better solutions? I'm confused at the moment, please give me some help.
You can go like following.
We need to compare existing date with given date. So I used $facet to categorize both dates
The original data should be one Eg : B0003. So that I just get the first element of the origin[] array to compare with rest[] array
used $unwind to flat the rest[]
Substract to get the different between both dates
Again used $facet to find previous and next dates.
Then combined both to get your expected result
NOTE : The final array may have 0<elements<=2. The expected result given by you will not find out whether its a prev or next date if there is a one element. So my suggestion is add another field to say which date it is as the mongo playground shows
[{
$facet: {
origin: [{
$match: { blogCode: 'B0001' }
}],
rest: [{
$match: {
$expr: {
$ne: ['$blogCode','B0001']
}
}
}]
}
}, {
$project: {
origin: {
$arrayElemAt: ['$origin',0]
},
rest: 1
}
}, {
$unwind: {path: '$rest'}
}, {
$project: {
diff: {
$subtract: [{ $toDate: '$rest.publishDate' },{ $toDate: '$origin.publishDate'}]
},
rest: 1,
origin: 1
}
}, {
$facet: {
prev: [{
$sort: {diff: -1}
},
{
$match: {
diff: {$lt: 0 }
}
},
{
$limit: 1
},
{
$addFields:{"rest.type":"PREV"}
}
],
next: [{
$sort: { diff: 1 }
},
{
$match: {
diff: { $gt: 0 }
}
},
{
$limit: 1
},
{
$addFields:{"rest.type":"NEXT"}
}
]
}
}, {
$project: {
combined: {
$concatArrays: ["$prev", "$next"]
}
}
}, {
$unwind: {
path: "$combined"
}
}, {
$replaceRoot: {
newRoot: "$combined.rest"
}
}]
Working Mongo playground
Inspire for the solution of varman proposed. I also find another way to solve my problem by using includeArrayIndex.
[
{
$sort: {
"publishDate": 1
},
},
{
$group: {
_id: 1,
root: {
$push: "$$ROOT"
}
},
},
{
$unwind: {
path: "$root",
includeArrayIndex: "rownum"
}
},
{
$replaceRoot: {
newRoot: {
$mergeObjects: [
"$root",
{
rownum: "$rownum"
}
]
}
}
},
{
$facet: {
currRow: [
{
$match: {
blogCode: "B0004"
},
},
{
$project: {
rownum: 1
}
}
],
root: [
{
$match: {
blogCode: {
$exists: true
}
}
},
]
}
},
{
$project: {
currRow: {
$arrayElemAt: [
"$currRow",
0
]
},
root: 1
}
},
{
$project: {
rownum: {
prev: {
$add: [
"$currRow.rownum",
-1
]
},
next: {
$add: [
"$currRow.rownum",
1
]
}
},
root: 1
}
},
{
$unwind: "$root"
},
{
$facet: {
prev: [
{
$match: {
$expr: {
$eq: [
"$root.rownum",
"$rownum.prev"
]
}
}
},
{
$replaceRoot: {
newRoot: "$root"
}
}
],
next: [
{
$match: {
$expr: {
$eq: [
"$root.rownum",
"$rownum.next"
]
}
}
},
{
$replaceRoot: {
newRoot: "$root"
}
}
],
}
},
{
$project: {
prev: {
$arrayElemAt: [
"$prev",
0
]
},
next: {
$arrayElemAt: [
"$next",
0
]
},
}
},
]
Working Mongo playground
I have a collection with documents similar to the following format:
{
departure:{name: "abe"},
arrival:{name: "tom"}
},
{
departure:{name: "bob"},
arrival:{name: "abe"}
}
And to get output like so:
{
name: "abe",
departureCount: 1,
arrivalCount: 1
},
{
name: "bob",
departureCount: 1,
arrivalCount: 0
},
{
name: "tom",
departureCount: 0,
arrivalCount: 1
}
I'm able to get the counts individually by doing a query for the specific data like so:
db.sched.aggregate([
{
"$group":{
_id: "$departure.name",
departureCount: {$sum: 1}
}
}
])
But I haven't figured out how to merge the arrival and departure name into one document along with counts for both. Any suggestions on how to accomplish this?
You should use a $map to split your doc into 2, then $unwind and $group..
[
{
$project: {
dep: '$departure.name',
arr: '$arrival.name'
}
},
{
$project: {
f: {
$map: {
input: {
$literal: ['dep', 'arr']
},
as: 'el',
in : {
type: '$$el',
name: {
$cond: [{
$eq: ['$$el', 'dep']
}, '$dep', '$arr']
}
}
}
}
}
},
{
$unwind: '$f'
}, {
$group: {
_id: {
'name': '$f.name'
},
departureCount: {
$sum: {
$cond: [{
$eq: ['$f.type', 'dep']
}, 1, 0]
}
},
arrivalCount: {
$sum: {
$cond: [{
$eq: ['$f.type', 'arr']
}, 1, 0]
}
}
}
}, {
$project: {
_id: 0,
name: '$_id.name',
departureCount: 1,
arrivalCount: 1
}
}
]
I have the data like below:
{
"order_id" : 1234567,
"order_pay_time" : 1437373297,
"pay_info" : [
{
"pay_type" : 0,
"pay_time" : 1437369046
},
{
"pay_type" : 0,
"pay_time" : 1437369123
},
{
"pay_type" : 0,
"pay_time" : 1437369348
}
]}
what I want to get is the last payment is of type 1, but $elemMatch just match the list where pay_type:1 exists, how can I match the orders which last payment is of "pay_type" : 1
You can use aggregation to get expected output. The query will be like following:
db.collection.aggregate({
$unwind: "$pay_info"
}, {
$match: {
"pay_info.pay_type": 1
}
}, {
$group: {
_id: "$_id",
"pay_info": {
$push: "$pay_info"
},
"order_id": {
$first: "$order_id"
},
"order_pay_time": {
$first: "$order_pay_time"
}
}
})
Moreover if you want latest pay_info.pay_time then you can sort it by descending order with limit 1, some what like following:
db.collection.aggregate({
$unwind: "$pay_info"
}, {
$match: {
"pay_info.pay_type": 1
}
}, {
$sort: {
"pay_info.pay_time": -1
}
}, {
$limit: 1
}, {
$group: {
_id: "$_id",
"pay_info": {
$push: "$pay_info"
},
"order_id": {
$first: "$order_id"
},
"order_pay_time": {
$first: "$order_pay_time"
}
}
})
Edit
Also you can use $redact to avoid $unwind like following:
db.collection.aggregate({
$match: {
"pay_info": {
$elemMatch: {
"pay_type": 1
}
}
}
}, {
$sort: {
"pay_info.pay_time": -1
}
}, {
$limit: 1
}, {
$redact: {
$cond: {
if: {
$eq: [{
"$ifNull": ["$pay_type", 1]
}, 1]
},
then: "$$DESCEND",
else: "$$PRUNE"
}
}
}).pretty()
Just found this thread for a similar problem I've had.
I ended up doing this, maybe that will be of interest to someone:
db.collection.find({
$where: function(){
return this.pay_info[this.pay_info.length-1].pay_type === 1
}
})