MongoDB Aggregation query to select document with min and max dates - mongodb

I have a collection of documents that contains 3 fields DateTime, Score, and Name. I would like to limit data to display only relevant information on mongo charts. Basically what I need is to select document with Minimal date and Maximum date and pass this information to MongoDB charts. Can you please suggest the best way, how can I do this?
Example document:
{
"_id": {
"$oid": "62f172b99d3a18179cee4c4c"
},
"Name": "pc",
"Score": 46,
"DateTime": {
"$date": {
"$numberLong": "1659646800000"
}
}
},
{
"_id": {
"$oid": "62f172b99d3a18179cee4c4c"
},
"Name": "pc",
"Score": 46,
"DateTime": {
"$date": {
"$numberLong": "1649646800000"
}
}
}
There are number of these kinds of documents and have different values taken at different time period. So I was able to write simple query to Sort by date and limit to one entry which returns only document with minimal date or maxium. Expected output for mew would be to return both of them

With MongoDB v5.0+, you can use $setWindowFields to compute a rank according to ascending and descending sort of DateTime. Then pick those with rank: 1 to choose the max/min DateTime.
db.collection.aggregate([
{
"$setWindowFields": {
"partitionBy": null,
"sortBy": {
"DateTime": 1
},
"output": {
"minRank": {
$rank: {}
}
}
}
},
{
"$setWindowFields": {
"partitionBy": null,
"sortBy": {
"DateTime": -1
},
"output": {
"maxRank": {
$rank: {}
}
}
}
},
{
"$match": {
$expr: {
$or: [
{
$eq: [
"$minRank",
1
]
},
{
$eq: [
"$maxRank",
1
]
}
]
}
}
},
{
// cosmetics
"$unset": [
"minRank",
"maxRank"
]
}
])
Here is the Mongo Playground for your reference.

Related

MongoDB Aggregate Query to find the documents with missing values

I am having a huge collection of objects where the data is stored for different employees.
{
"employee": "Joe",
"areAllAttributesMatched": false,
"characteristics": [
{
"step": "A",
"name": "house",
"score": "1"
},
{
"step": "B",
"name": "car"
},
{
"step": "C",
"name": "job",
"score": "3"
}
]
}
There are cases where the score for an object is completely missing and I want to find out all these details from the database.
In order to do this, I have written the following query, but seems I am going wrong somewhere due to which it is not displaying the output.
I want the data in the following format for this query, so that it is easy to find out which employee is missing the score for which step and which name.
db.collection.aggregate([
{
"$unwind": "$characteristics"
},
{
"$match": {
"characteristics.score": {
"$exists": false
}
}
},
{
"$project": {
"employee": 1,
"name": "$characteristics.name",
"step": "$characteristics.step",
_id: 0
}
}
])
You need to use $exists to check the existence
playground
You can use $ifNull to handle both cases of 1. the score field is missing 2. score is null.
db.collection.aggregate([
{
"$unwind": "$characteristics"
},
{
"$match": {
$expr: {
$eq: [
{
"$ifNull": [
"$characteristics.score",
null
]
},
null
]
}
}
},
{
"$group": {
_id: null,
documents: {
$push: {
"employee": "$employee",
"name": "$characteristics.name",
"step": "$characteristics.step",
}
}
}
},
{
$project: {
_id: false
}
}
])
Here is the Mongo playground for your reference.

MongoDB Express filter results on count of nested list of ints

Im new to mongoDB, so having some difficulties filtering my collections as I need.
I have this collection
[
{
"id": "sdfsdfsdf",
"key": "tryrtyrty",
"createdAt": "2017-01-28T01:22:14.398Z",
"counts": [
170
],
"value": "Something"
},
{
"id": "hjmhjhjm",
"key": "yuiyuiyui",
"createdAt": "2017-01-28T01:22:14.398Z",
"counts": [
150,
160
],
"value": "Something"
}
]
I want to filter by range of dates (min-max date) and range of counts, meaning I want to give a min and max value for the totalCount of the sum in the field. Example, I would like to filter results whose min counts sum is 200 and max 400. This would only return the second result (the sum is 310, while the first result the sum is 170).
Right now I have this:
db.collection.aggregate([
{
$project: {
totalCount: {
$sum: {
"$filter": {
"input": "$counts",
"as": "bla",
"cond": {
"$gte": [
"$sum", // I think the error is here, I dont know how to reference the sum of the list
300 //I want records whose count sum is more than this value
]
}
}
}
}
}
}
])
This returns all the records with TotalCount on 0, which is not want I want, I would like the records matching the count condition with the correct TotalCount (and eventually matching the dates as well)
[
{
"_id": ObjectId("5a934e000102030405000000"),
"totalCount": 0
},
{
"_id": ObjectId("5a934e000102030405000001"),
"totalCount": 0
}
Desired output
[
{
"_id": ObjectId("5a934e000102030405000001"),
"totalCount": 310,
"key": "yuiyuiyui",
"createdAt": "2017-01-28T01:22:14.398Z"
}
]
Any help would be greatly appreciated. Even more if it comes with both dates and count filter.
You should not use $filter as it doesn't suitable for this scenario.
Stages:
set - Create totalCounts with $sum all elements in counts array.
$match - Fiter the documents which has totalCounts within the range.
$unset - Remove fields for decorating output document.
db.collection.aggregate([
{
$set: {
"totalCounts": {
$sum: "$counts"
}
}
},
{
$match: {
totalCounts: {
$gte: 200,
$lte: 400
}
}
},
{
$unset: [
"counts",
"id"
]
}
])
Sample Mongo Playground
For date range filter, you need $expr and $and operator as below
{
$match: {
totalCounts: {
$gte: 200,
$lte: 400
},
$expr: {
$and: [
{
$gte: [
{
"$toDate": "$createdAt"
},
/* Date from */
]
},
{
$lte: [
{
"$toDate": "$createdAt"
},
/* Date to */
]
}
]
}
}
}
Sample Mongo Playground (with date range filter)

Mongodb groupby range of date and id

I have a database
{
"_id": "222jMQDEHuHXTuDeF",
"customer_id": "QfdAFubKS9ytdbhbq",
"createdDate": {
"$date": "2020-07-27T08:19:40.791Z"
}
},
{
"_id": "278jKLDEHuHXItDeF",
"customer_id": "HtdAFubJS8ytdnjbe",
"createdDate": {
"$date": "2020-07-26T08:19:40.791Z"
}
},
{
"_id": "128lRLDEHuHXItPhy",
"customer_id": "KodATubJS8yyqkjbe",
"createdDate": {
"$date": "2020-07-25T08:19:40.791Z"
}
}
I need to get data of the previous week where current date is the end date and group by date and customer_id and get the count of customer_id in mongodb.
You can compare like following. For the easiness I have added previouseWeekStart, but you can directly call it inside the $match stage instead of using $addFields. This should definitely work, but I can't show a demo in mongoplayground since I have added subtract symbol (-) for calculating previous date.
db.collection.aggregate([
{
$addFields: {
previouseWeekStart: new Date(new Date()-7*60*60*24*1000)
}
},
{
$match: {
$expr: {
$and: [
{
$gt: [
"$createdDate",
"$previouseWeekStart"
]
},
{
$lt: [
"$createdDate",
new Date()
]
}
]
}
}
},
{
$group: {
_id: {
cusId: "$customer_id",
date: "$createdDate"
},
count: {
$sum: 1
}
}
}
])

mongodb aggregation pipeline - convert array entries to subdocument

I'm working on a mongodb aggregation pipeline. I currently have the following document:
{
"data": [
{ "type": "abc", "price": 25000, "inventory": 15 },
{ "type": "def", "price": 8000, "inventory": 150 }
]
}
And I would like to turn it in:
{
"abc": { "price": 25000, "inventory": 15 },
"def": { "price": 8000, "inventory": 150 }
}
I could do it field by field with a $project stage, but obviously my real example has way more fields then this simple example... And I also have no certainty about which values could be in type.
Since data is an array, you could use an aggregation pipeline similar to:
$unwind to split those into separate documents each containing a single item
$project to change it from {type:x, price:y, inventory:z} to [x,[{price:y, inventory:z}]]
$group to collect the items back to a single array of arrays
$arrayToObject to convert the array of arrays to [{x:{price:y,inventory:z}},...]
If you need more detail, I can see about working up a sample when I have a bit more time.
Thanks to #Joe I managed to create a solution:
db.collection.aggregate([
{
$unwind: "$data"
},
{
$project: {
data: [
"$data.type",
{
price: "$data.price",
inventory: "$data.inventory"
}
],
}
},
{
$group: {
_id: "$_id",
doc: {
$push: "$$ROOT"
}
}
},
{
$replaceRoot: {
newRoot: {
$arrayToObject: "$doc.data"
}
}
}
])
Result:
[
{
"abc": {
"inventory": 15,
"price": 25000
},
"def": {
"inventory": 150,
"price": 8000
}
}
]
Mongo Playground

Schema design for MongoDB pre-aggregated reports

I'm following the official MongoDB docs (http://docs.mongodb.org/ecosystem/use-cases/pre-aggregated-reports/) about pre-aggregated reports. According to the tutorial, a pre-aggregated document should look like this:
{
_id: "20101010/site-1/apache_pb.gif",
metadata: {
date: ISODate("2000-10-10T00:00:00Z"),
site: "site-1",
page: "/apache_pb.gif" },
hourly: {
"0": 227850,
"1": 210231,
...
"23": 20457 },
minute: {
"0": {
"0": 3612,
"1": 3241,
...
"59": 2130 },
"1": {
"0": ...,
},
...
"23": {
"59": 2819 }
}
}
The thing is that I'm currently using this approach, and I already have some data stored this way. But now I want to add another dimension in the metadata subdocument and I was reconsidering the whole thing.
My question is: is there a reason to build the _id attribute with the same information stored in the metadata attribute? Wouldn't be enough to create a compound index (unique) around metadata and use an ObjectId for the _id key?
Thanks!
Other way ;)
You can create simple collection:
{
"ts": "unix timestamp",
"site": "site-1",
"page": "/apache_pb.gif"
}
this collection will be had a very good performance on insert
and using complex aggregate query (with aggregate by any time grain):
db.test.aggregate(
[
{
"$project": {
"ts": 1,
"_id": 0,
"grain": {
"$subtract": [
{
"$divide": [
"$ts",
3600
]
},
{
"$mod": [
{
"$divide": [
"$ts",
3600
]
},
1
]
}
]
},
"site": 1,
"page": 1
}
},
{
"$group": {
"_id": {
"site": "$site",
"page": "$page",
"grain": "$grain",
}
}
},
{
"$group": {
"tsum": {
"$sum": 1
},
"_id": {
"grain": "$_id.grain"
}
}
},
{
"$project": {
"tsum": "$tsum",
"_id": 0,
"grain": "$_id.grain"
}
},
{
"$sort": {
"grain": 1
}
}
])
aggregate your statistics by one hour - 3600 sec in this example
imho - this is a more simple and manageable solution without complex datamodel with good preformance (don't forget about index)