Pass data from Sunday to week before in MongoDB - mongodb

My aggregation gets the data of documents per week. In this case I'm getting data from days 18 to 24 of may:
{ "_id" : 20, "count" : 795 }
{ "_id" : 21, "count" : 221 }
Since 'week' in mongo starts from sundays, the data from sundays is creating a new week (In this case is 21). Is there any way I can transfer the data from Sundays to the week before or backwards?
The result would be:
{ "_id" : 20, "count" : 1016 }
Aggregation:
[{
$match: {
start_date: {
$gte: ISODate('2020-05-18T00:00:01'),
$lte: ISODate('2020-05-24T23:59:59')
}
}
}, {
$project: {
week: {
$week: '$start_date'
},
solved: '$solved',
survey: '$survey'
}
}, {
$group: {
_id: '$week',
count: {
$sum: 1
}
}
}, {
$sort: {
_id: 1
}
}]
UPDATE:

I think the below query will do the trick.
The timezone key in if condition can be removed if your week-wise-sort is independent of the time zone of ISODate value in DB
db.<Collection-Name>.aggregate([
{
$match: {
start_date: {
$gte: ISODate('2020-05-18T00:00:01'),
$lte: ISODate('2020-05-24T23:59:59')
}
}
}, {
$project: {
week: {
"$cond": {
"if": {"$eq": [{"$dayOfWeek": {"date": "$start_date", "timezone": "-0500"}}, 1]},
"then": {"$subtract": [{"$week": '$start_date'}, 1]},
"else": {"$week": '$start_date'}
}
},
solved: '$solved',
survey: '$survey'
}
}, {
$group: {
_id: '$week',
count: {
$sum: 1
}
}
}, {
$sort: {
_id: 1
}
}
])

Related

Mongodb aggregation , group by items for the last 5 days

I'm trying to get the result in some form using mongodb aggregation.
here is my sample document in the collection:
[{
"_id": "34243243243",
"workType": "TESTWORK1",
"assignedDate":ISODate("2021-02-22T00:00:00Z"),
"status":"Completed",
},
{
"_id": "34243243244",
"workType": "TESTWORK2",
"assignedDate":ISODate("2021-02-21T00:00:00Z"),
"status":"Completed",
},
{
"_id": "34243243245",
"workType": "TESTWORK3",
"assignedDate":ISODate("2021-02-20T00:00:00Z"),
"status":"InProgress",
}...]
I need to group last 5 days data in an array by workType count having staus completed.
Expected result:
{_id: "TESTWORK1" , value: [1,0,4,2,3] ,
_id: "TESTWORK2" , value: [3,9,,3,5],
_id : "TESTWORK3", value: [,,,3,5]}
Here is what I'm trying to do, but not sure how to get the expected result.
db.testcollection.aggregate([
{$match:{"status":"Completed"}},
{$project: {_id:0,
assignedSince:{$divide:[{$subtract:[new Date(),$assignedDate]},86400000]},
workType:1
}
},
{$match:{"assignedSince":{"lte":5}}},
{$group : { _id:"workType", test :{$push:{day:"$assignedSince"}}}}
])
result: {_id:"TESTWORK1": test:[{5},{3}]} - here I'm getting the day , but I need the count of the workTypes on that day.
Is there any easy way to do this? Any help would be really appreciated.
Try this:
db.testcollection.aggregate([
{
$match: { "status": "Completed" }
},
{
$project: {
_id: 0,
assignedDate: 1,
assignedSince: {
$toInt: {
$divide: [{ $subtract: [new Date(), "$assignedDate"] }, 86400000]
}
},
workType: 1
}
},
{
$match: { "assignedSince": { "$lte": 5 } }
},
{
$group: {
_id: {
workType: "$workType",
assignedDate: "$assignedDate"
},
count: { $sum: 1 }
}
},
{
$group: {
_id: "$_id.workType",
values: { $push: "$count" }
}
}
]);

MongoDB - convert string to timestamp, group by hour

I have the following documents stored in a collection:
{
"REQUESTTIMESTAMP" : "26-JUN-19 01.34.10.095000000 AM",
"UNHANDLED_INTENT" : 0,
"USERID" : "John",
"START_OF_INTENT_SKILL_CONVERSATION" : 0,
"PROPERTYCODE" : ""
}
I want to group this by the hour(which we will get from 'REQUESTTIMESTAMP')
Earlier, I had this document stored in the collection in a different way, where I had a separate field for hours, and used that hours field to group:
Previous aggregation query :
collection.aggregate([
{'$match': query}, {
'$group': {
"_id": {
"hour": "$hour",
"sessionId": "$sessionId"
}
}
}, {
"$group": {
"_id": "$_id.hour",
"count": {
"$sum": 1
}
}
}
])
Previous collection structure:
{
"timestamp" : "1581533210921",
"date" : "12-02-2020",
"hour" : "13",
"month" : "02",
"time" : "13:46:50",
"weekDay" : "Wednesday",
"__v" : 0
}
How can I do the above same Previous aggregation query with the new document structure (After extracting hours from 'REQUESTTIMESTAMP' field?)
You should convert your timestamp to Date object then take hour from your date object.
db.collection.aggregate([{
'$match': query
}, {
$project: {
date: {
$dateFromString: {
dateString: '$REQUESTTIMESTAMP',
format: "%m-%d-%Y" //This should be your date format
}
}
}
}, {
$group: {
_id: {
hour: {
$hour: "$date"
}
}
}
}])
Problem is months names are not supported by MongoDB. Either you write a lot of code or you use libraries like moments.js. First update your REQUESTTIMESTAMP to proper Date object, then you can group it.
db.collection.find().forEach(function (doc) {
var d = moment(doc.REQUESTTIMESTAMP, "DD-MMM-YY hh.mm.ss.SSS a");
db.collection.updateOne(
{ _id: doc._id },
{ $set: { date: d.toDate() } }
);
})
db.collection.aggregate([
{
$group: {
_id: { $hour: "$date" },
count: { $sum: 1 }
}
}
])
In case if you're not able to update DB with actual date field & still wanted to proceed with existing format, try this query it will add hour field extracted from given string field REQUESTTIMESTAMP :
Query :
db.collection.aggregate([
{
$addFields: {
hour: {
$let: {
/** split string into three parts date + hours + AM/PM */
vars: { hour: { $slice: [{ $split: ["$REQUESTTIMESTAMP", " "] }, 1, 2] } },
in: {
$cond: [{ $in: ["AM", "$$hour"] }, // Check AM exists in array
{ $toInt: { $substr: [{ $arrayElemAt: ["$$hour", 0] }, 0, 2] } }, // If yes then return int of first 2 letters of first element in hour array
{ $add: [{ $toInt: { $substr: [{ $arrayElemAt: ["$$hour", 0] }, 0, 2] } }, 12] } ] // If PM add 12 to int of first 2 letters of first element in hour array
}
}
}
}
}
])
Test : MongoDB-Playground

Date range filter in mongodb Groupby aggregation query

I want to filter each group aggregation by different date-range.For dayMonthStatus I want to filter by $currentDate - 1 , for monthStatus - current monthnumber, for weekStatus
- current weeknumber.
Sample json:
{
"createdAt" : "2019-10-02T04:55:13.472Z",
"Day-month" : "2-10",
"Month" : NumberInt(10),
"Year" : NumberInt(2019),
"Week" : NumberInt(39)
}
I have tried $cond operator but get only blank values or error "errmsg" : "An object representing an expression must have exactly one field", below is the groupby aggregation code on which I want to apply filter.
db.collection.aggregate([
// current aggregation stages,
{
$facet: {
"dayMonthStatus": [
{ $group: { _id: { status: "$Ctrans.status", "dayMonth": "$Day-month" }, count: { $sum: 1 } } }
],
"monthStatus": [
{ $group: { _id: { status: "$Ctrans.status", "month": "$Month" }, count: { $sum: 1 } } }
],
"yearStatus": [
{ $group: { _id: { status: "$Ctrans.status", "year": "$Year" }, count: { $sum: 1 } } }
],
"weekStatus": [
{ $group: { _id: { status: "$Ctrans.status", "week": "$Week" }, count: { $sum: 1 } } }
]
}
}
])
I have tried $match in groupby in below format:
"dayMonthStatus": [
{ $group: { _id: { status: "$Customer-transaction.status", "dayMonth": "$Day-month" }, count: { $sum: 1 },
} },{ $match: {"dayMonth": { '$gte': "1-10", '$lt': "3-10"}}}
]

Mongodb aggregate $group and count for date ranges

I have documents like these:
{
"_id" : ObjectId("5cc80389c723e046f504b5a9"),
"adddress" : "string",
"checkIn" : "2019-04-30T08:12:57.909Z"
},
{
"_id" : ObjectId("5cc995f5a6f3eb7c483b019f"),
"adddress" : "string",
"checkIn" : "2019-05-01T12:49:57.561Z"
}
I have tried aggrgation like this:
var start = new Date("2019-04-30T08:12:57.909Z");
var end = new Date("2019-05-01T12:49:57.561Z");
var pipeline = [
{
$match: {
checkIn: {
$gte: start,
$lte: end
}
}
},
{
$group: {
_id: {
year: {
$year: "$checkIn"
},
month: {
$month: "$checkIn"
},
day: {
$dayOfYear: "$checkIn"
}
},
count: {
$sum: 1
}
}
}];
db.collections.aggregate(pipeline).toArray()
Is it possible to count them by checkIn date and get result like this:
"_id": [{
"checkIn": "2019-03-15T00:00:00Z",
"count": 4
}, {
"checkIn": "2019-04-30T00:00:00Z",
"count": 1
}, {
"checkIn": "2019-05-10T00:00:00Z",
"count": 1
}],
The result is shown the total number of the day.
{$project: {
checkIn: { $dateToString: { format: '%Y-%m-%d', date: '$checkIn' } }
}},
{$group: {
_id: '$checkIn',
checkIn: {$first: '$checkIn'},
count: {$sum: 1}
}},
{$sort: {checkIn: 1}}
Try this: I have tested this query and its working.
db.sample.aggregate([{
$addFields: {
date: {
$dateFromString: {
dateString: "$checkIn"
}
}
}
},{
$match: {
date: {
$gte: start,
$lte: end
}
}
},
{
$addFields: {
dateString: {
$dateToString: {
format: "%Y-%m-%d",
date: "$date"
}
}
}
},
{
$group: {
_id: "$dateString",
count: {
$sum: 1
}
}
}
]);

MongoDb aggregate pipeline with multiple groupings

I'm trying to get my head around an aggregate pipeline in MongoDb with multiple groups.
I have the following data: https://gist.github.com/bomortensen/36e6b3fbc987a096be36a66bbfe30d82
Expected data would be: https://gist.github.com/bomortensen/7b220df1f1da83be838acfb2ed79a2ee (total quantity sum based on highest version, hourly)
I need to write a query which does the following:
Group the data by the field MeterId to get unique meter groups.
In each group I then need to group by the StartDate's year, month, day and hour since all objects StartDate is stored as quarters, but I need to aggregate them into whole hours.
Finally, I need to only get the highest version from the Versions array by VersionNumber
I've tried the following query, but must admit I'm stuck:
mycollection.aggregate([
{ $group: {
_id : { ediel: "$_id.MeterId", start: "$_id.StartDate" },
versions: { $push: "$Versions" }
}
},
{ $unwind: { path: "$versions" } },
{ $group: {
_id: {
hour: { $hour: "$_id.start.DateTime" },
key: "$_id"
},
quantitySum: { $sum: "$Versions.Quantity" }
}
},
{ $sort: { "_id.hour": -1 } }
]);
Does anyone know how I should do this? :-)
This would give :
1 $project : get $hour from date, create a maxVersion field per record
1 $unwind to remove the Versions array
1 $project to add a keep field that will contain a boolean to check if the record should be kept or not
1 $match that match only higher version number eg keep == true
1 $group that group by id/hour and sum the quantity
1 $project to set up your required format
Query is :
db.mycollection.aggregate([{
$project: {
_id: 1,
Versions: 1,
hour: {
"$hour": "$_id.StartDate"
},
maxVersion: { $max: "$Versions.VersionNumber" }
}
}, {
$unwind: "$Versions"
}, {
$project: {
_id: 1,
Versions: 1,
hour: 1,
maxVersion: 1,
keep: { $eq: ["$Versions.VersionNumber", "$maxVersion"] }
}
}, {
$match: { "keep": true }
}, {
$group: {
_id: { _id: "$_id.MeterId", hour: "$hour" },
StartDate: { $first: "$_id.StartDate" },
QuantitySum: { $sum: "$Versions.Quantity" }
}
}, {
$project: {
_id: { _id: "$_id._id", StartDate: "$StartDate" },
hour: "$_id.hour",
QuantitySum: 1
}
}])
In your example output you take into account only the first higher versionNumber, You have { "VersionNumber" : 2, "Quantity" : 7.5 } and { "VersionNumber" : 2, "Quantity" : 8.4 } for hour 2 and id 1234 but you only take { "VersionNumber" : 2, "Quantity" : 7.5 }
I dont know if this is intended or not but in this case you want to take only the first MaxVersion number. After the $match, I added :
1 $group that push versions previously filter in an array
1 $project that $slice this array to take only the first element
1 $unwind to remove this array (which contains only one elemement)
The query that match your output is :
db.mycollection.aggregate([{
$project: {
_id: 1,
Versions: 1,
hour: {
"$hour": "$_id.StartDate"
},
maxVersion: { $max: "$Versions.VersionNumber" }
}
}, {
$unwind: "$Versions"
}, {
$project: {
_id: 1,
Versions: 1,
hour: 1,
maxVersion: 1,
keep: { $eq: ["$Versions.VersionNumber", "$maxVersion"] }
}
}, {
$match: { "keep": true }
}, {
$group: {
_id: { _id: "$_id.MeterId", StartDate: "$_id.StartDate" },
Versions: { $push: "$Versions" },
hour: { "$first": "$hour" }
}
}, {
$project: {
_id: 1,
hour: 1,
Versions: { $slice: ["$Versions", 1] }
}
}, {
$unwind: "$Versions"
}, {
$sort: {
_id: 1
}
}, {
$group: {
_id: { _id: "$_id._id", hour: "$hour" },
StartDate: { $first: "$_id.StartDate" },
QuantitySum: { $sum: "$Versions.Quantity" }
}
}, {
$project: {
_id: { _id: "$MeterId._id", StartDate: "$StartDate" },
Hour: "$_id.hour",
QuantitySum: 1
}
}])
Output is :
{ "_id" : { "MeterId" : "4567", "StartDate" : ISODate("2016-09-20T03:00:00Z") }, "QuantitySum" : 25.9, "Hour" : 3 }
{ "_id" : { "MeterId" : "4567", "StartDate" : ISODate("2016-09-20T02:00:00Z") }, "QuantitySum" : 25.9, "Hour" : 2 }
{ "_id" : { "MeterId" : "1234", "StartDate" : ISODate("2016-09-20T03:00:00Z") }, "QuantitySum" : 25.9, "Hour" : 3 }
{ "_id" : { "MeterId" : "1234", "StartDate" : ISODate("2016-09-20T02:00:00Z") }, "QuantitySum" : 25.9, "Hour" : 2 }
Sorry, I just dont find a straight forward way to round the hour. You can try the following. You will unwind the versions, so you can apply the grouping to collect the max version, push the versions for the next step, which is to project to filter the matching records with max version and final project to sum the max versions quantity. Right now start dt is the min from the group. You should be fine as long as you have versions at the top of the hour.
db.collection.aggregate([{
$unwind: {
path: "$Versions"
}
}, {
$group: {
_id: {
MeterId: "$_id.MeterId",
start: {
$hour: "$_id.StartDate"
}
},
startDate: {
$min: "$_id.StartDate"
},
maxVersion: {
$max: "$Versions.VersionNumber"
},
Versions: {
$push: "$Versions"
}
}
}, {
$sort: {
"_id.start": -1
}
}, {
$project: {
_id: {
MeterId: "$_id.MeterId",
StartDate: "$startDate"
},
hour: "$_id.start",
Versions: {
$filter: {
input: "$Versions",
as: "version",
cond: {
$eq: ["$maxVersion", "$$version.VersionNumber"]
}
}
}
}
}, {
$project: {
_id: 1,
hour: 1,
QuantitySum: {
$sum: "$Versions.Quantity"
}
}
}]);
Sample Output
{
"_id": {
"MeterId": "1234",
"StartDate": ISODate("2016-09-20T02:00:00Z")
},
"QuantitySum": 15,
"hour": 2
}