Mongo aggregation: Count documents based on array field matching condition - mongodb

I am trying to collects some statistics from a mongo collection using an aggregation pipeline but can not seem to solve my issue.
I have a collection of documents looking like this.
{
_id: “36723678126”,
dates: [ 2020-03-10T10:17:48.799Z, 2020-03-10T08:46:50.347Z, 2019-07-11T13:16:17.294Z ]
}
I would like count the number of documents that meet the following conditions
At least one date in the last 30 days
No dates between 60 and 30 days ago
At least one date before 60 days ago
Would really appreciate any help! :)

This should solve the problem (https://mongoplayground.net/p/B3LbEo8UaHA):
Test data:
[
{
_id: 1,
dates: [
ISODate("2020-03-10T10:17:48.799Z"),
ISODate("2020-03-10T08:46:50.347Z"),
ISODate("2019-07-11T13:16:17.294Z")
]
},
{
_id: 2,
dates: [
ISODate("2020-04-10T10:17:48.799Z"),
ISODate("2020-05-10T08:46:50.347Z"),
ISODate("2019-10-11T13:16:17.294Z")
]
}
]
Query:
db.collection.aggregate([
// create the new fields based on the date rules and count occurences of array
{
$addFields: {
"last_30_days": {
$sum: {
$map: {
"input": "$dates",
"as": "d",
"in": {
$cond: {
"if": {
$lte: [
"$$d",
{
$subtract: [
"$$NOW",
2592000000 // 30 days
]
}
]
},
"then": 1,
"else": 0
}
}
}
}
},
"between_30_60": {
$sum: {
$map: {
"input": "$dates",
"as": "d",
"in": {
$cond: {
"if": {
$and: [
{
$lt: [
"$$d",
{
$subtract: [
"$$NOW",
2592000000 // days
]
}
]
},
{
$gt: [
"$$d",
{
$subtract: [
"$$NOW",
5184000000 // 60 days
]
}
]
}
]
},
"then": 1,
"else": 0
}
}
}
}
},
"last_60_days": {
$sum: {
$map: {
"input": "$dates",
"as": "d",
"in": {
$cond: {
"if": {
$lte: [
"$$d",
{
$subtract: [
"$$NOW",
5184000000
]
}
]
},
"then": 1,
"else": 0
}
}
}
}
}
}
},
// find which document meet the conditions (1 for true, 0 for false)
{
$project: {
"meet_conditions": {
$cond: {
"if": {
$and: [
{
$gt: [
"$last_30_days", // at least one occurence
0
]
},
{
$eq: [
"$between_30_60", // no occurences
0
]
},
{
$gt: [
"last_60_days", // at least one occurence
0
]
}
]
},
"then": 1,
"else": 0
}
}
}
},
// count documents, summing the meet_conditions field (because they are 0 or 1)
{
$group: {
_id: null,
count: {
$sum: "$meet_conditions"
}
}
}
])
Result:
[
{
"_id": null,
"count": 1
}
]
So in the example above, there's just one documents that meet your conditions.
Obs.: To calculate the days you have to do (the bold numbers are the day you want):
1 * 1000 * 60 * 60 * 24 * 30 = 2592000000ms = 30 days
1 * 1000 * 60 * 60 * 24 * 60 = 5184000000ms = 60 days

Related

Create a view from mongo collection

I have a mongo collection with records like
and so on.
The sample record in JSON format
[{
empId:'123',
empName:'Emp1',
shiftHours:'Regular'
},
{
empId:'123',
empName:'Emp1',
shiftHours:'Morning'
}
]
Basically an employee can work in regular shift(9am-6 pm) or morning shift (6 am-3 pm) or night shift (9pm-6 am). The hours are just example here but the idea is that the working hours are categorized in 3 shifts. I want to create a view with flat structure per employee like this
and so on.
I am trying to understand what's the best way to create such a flat view (coming from SQL background, I think a procedure/function has to be written) but not sure what's the best way to do so using No-Sql (Mongo db).
Any suggestions?
$group by empId and conditionally $sum by shiftHours.
db.collection.aggregate([
{
$group: {
_id: "$empId",
empName: {
$first: "$empName"
},
Morning: {
$sum: {
"$cond": {
"if": {
$eq: [
"$shiftHours",
"Morning"
]
},
"then": 1,
"else": 0
}
}
},
Regular: {
$sum: {
"$cond": {
"if": {
$eq: [
"$shiftHours",
"Regular"
]
},
"then": 1,
"else": 0
}
}
},
Evening: {
$sum: {
"$cond": {
"if": {
$eq: [
"$shiftHours",
"Evening"
]
},
"then": 1,
"else": 0
}
}
}
}
},
{
$set: {
Morning: {
$cond: [
{
$gt: [
"$Morning",
0
]
},
"Y",
"N"
]
},
Regular: {
$cond: [
{
$gt: [
"$Regular",
0
]
},
"Y",
"N"
]
},
Evening: {
$cond: [
{
$gt: [
"$Evening",
0
]
},
"Y",
"N"
]
}
}
}
])
Mongo Playground

mongodb aggregation get max number of negative sequence in array

I need to get the max count of negative sequence from array via aggregation , example documents:
{
"id": 1,
x: [ 1,1,-1,-1,1,1,1,-1,-1,-1,-1]
},
{
"id": 2,
x: [ 1,-1,-1,1,1,1,-1 ]
}
expected result:
{"id": 1,x:4},
{"id": 2,x:2}
Please, advice?
You can use $reduce to iterate the array and $cond to apply your logic (consecutive negatives)
The carrier is in format
{
previous: // previous value to compare for continuity
acc: // number of consecutive negatives in the current sequence
max: // length of the longest sequence
}
$let is to memoise current accumulator to reuse in the max calculation. It's optional yet convenient:
db.collection.aggregate([
{
"$set": {
"x": {
"$reduce": {
"input": "$x",
"initialValue": {
previous: 0,
acc: 0,
max: 0
},
"in": {
$let: {
vars: {
result: {
"$cond": {
"if": {
"$and": [
{
"$lt": [
"$$this",
0
]
},
{
"$lt": [
"$$value.previous",
0
]
}
]
},
"then": {
"$add": [
"$$value.acc",
1
]
},
"else": {
"$cond": {
"if": {
"$lt": [
"$$this",
0
]
},
"then": 1,
"else": 0
}
}
}
}
},
in: {
previous: "$$this",
acc: "$$result",
max: {
"$cond": {
"if": {
$gt: [
"$$value.max",
"$$result"
]
},
"then": "$$value.max",
"else": "$$result"
}
}
}
}
}
}
}
}
},
{
"$set": {
x: "$x.max"
}
}
])
Try it on mongoplayground.net.
Here's another way to do it. The general idea is to $reduce the sequence to a string and then $split to make an array filled with strings of each run. Then map the array of strings to an array of string lengths and then take the max.
db.collection.aggregate({
"$project": {
"_id": 0,
"id": 1,
"x": {
"$max": {
"$map": {
"input": {
$split: [
{
"$reduce": {
"input": "$x",
"initialValue": "",
"in": {
$concat: [
"$$value",
{
"$cond": [
{
"$gt": [
"$$this",
0
]
},
"p",
"n"
]
}
]
}
}
},
"p"
]
},
"in": {
"$strLenBytes": "$$this"
}
}
}
}
}
})
Try it on mongoplayground.net.

Groupby Array elements and categorize them based on date in Mongodb Query

I have an array element in my DB and I want to group By and calculate the number of repetitions of different elements in this array based on different datetime. assume following collection:
{
_id: ObjectId(7df78ad8902c)
title: 'MongoDB Overview',
tags: ['SQL', 'database', 'NoSQL'],
created_at: 2021-10-03 10:05:51.755Z
},
{
_id: ObjectId(7df78ad8902d)
title: 'NoSQL Overview',
tags: ['mongodb', 'database', 'PHP'],
created_at: 2021-10-03 14:05:51.755Z
},
{
_id: ObjectId(7df78ad8902d)
title: 'Developing',
tags: ['java', 'btc/usdt', 'PHP'],
created_at: 2021-10-03 14:05:51.755Z
}
,
{
_id: ObjectId(7df78ad8902d)
title: 'databases for search',
tags: ['elasticsearch', 'database', 'PHP'],
created_at: 2021-10-03 12:05:51.755Z
}
I want to calculate the number of repetitions of different elements in tags field such as mongodb, database, noSQL based on datetime (for example hot hashtags in last hour, today or this month) in this collection. How can I solve this problem in mongo?
expected answer like .
1 - hot hashtags in last hour ['a' , 'b' , 'c']
2 - hot hastags in last 5 hours : ...
3 - today : ...
4 - this month : ...
Query
not calendar based, based on difference on milliseconds
keep only last 30 days data
facet and 4 group by
its exactly the same code 4x
The only difference is the multiply
last 30days : (now_date-created_at) <= (* 30 24 60 60 1000)
last 24h : (now_date-created_at) <= (* 24 60 60 1000)
last 120 hours(5days) : (now_date-created_at) <= (* 5 60 60 1000)
last 60 min : (now_date-created_at) <= (* 60 60 1000)
*subtraction works on dates also, and returns milliseconds
filter the date depending on what we want its 4 different filters
unwind
group by tag and count occurences
sort by count
limit 2 to keep like the 2 top hotttest tags, you can change it to any value, like limit 1 to keep only the hottest tag
*if you want calendar based like 3 October(3 days data only), query must be changed, the same is for day(query is for 24 hours) etc (in those cases we should use $hour $month etc)
Its not big change in query.
//same month
{"$eq" : [ {"$month" : "$$NOW"}, {"$month" : "$created_at"} ]
//same day(assuming same month from previous filter)
{"$eq" : [ {"$dayOfMonth" : "$$NOW"}, {"$dayOfMonth" : "$created_at"} ]
//same hour
*we could also use the new $dateTrunc to check for same month etc.
Test code here
(Query is big but its the same thing 4x)
db.collection.aggregate([
{
"$set": {
"created_at": {
"$dateFromString": {
"dateString": "$created_at"
}
}
}
},
{
"$unwind": {
"path": "$tags"
}
},
{
"$match": {
"$expr": {
"$lte": [
{
"$subtract": [
"$$NOW",
"$created_at"
]
},
{
"$multiply": [
30,
24,
60,
60,
1000
]
}
]
}
}
},
{
"$facet": {
"month-tag": [
{
"$match": {
"$expr": {
"$lte": [
{
"$subtract": [
"$$NOW",
"$created_at"
]
},
{
"$multiply": [
30,
24,
60,
60,
1000
]
}
]
}
}
},
{
"$group": {
"_id": "$tags",
"count": {
"$sum": 1
}
}
},
{
"$sort": {
"count": -1
}
},
{
"$limit": 2
},
{
"$project": {
"_id": 0,
"tag": "$_id"
}
}
],
"day-tag": [
{
"$match": {
"$expr": {
"$lte": [
{
"$subtract": [
"$$NOW",
"$created_at"
]
},
{
"$multiply": [
24,
60,
60,
1000
]
}
]
}
}
},
{
"$group": {
"_id": "$tags",
"count": {
"$sum": 1
}
}
},
{
"$sort": {
"count": -1
}
},
{
"$limit": 2
},
{
"$project": {
"_id": 0,
"tag": "$_id"
}
}
],
"5hour-tag": [
{
"$match": {
"$expr": {
"$lte": [
{
"$subtract": [
"$$NOW",
"$created_at"
]
},
{
"$multiply": [
5,
60,
60,
1000
]
}
]
}
}
},
{
"$group": {
"_id": "$tags",
"count": {
"$sum": 1
}
}
},
{
"$sort": {
"count": -1
}
},
{
"$limit": 2
},
{
"$project": {
"_id": 0,
"tag": "$_id"
}
}
],
"hour-tag": [
{
"$match": {
"$expr": {
"$lte": [
{
"$subtract": [
"$$NOW",
"$created_at"
]
},
{
"$multiply": [
60,
60,
1000
]
}
]
}
}
},
{
"$group": {
"_id": "$tags",
"count": {
"$sum": 1
}
}
},
{
"$sort": {
"count": -1
}
},
{
"$limit": 2
},
{
"$project": {
"_id": 0,
"tag": "$_id"
}
}
]
}
}
])

want to convert "00:10:00" to a single integer in mongodb

There are many documents in the collection which contains this field timeTaken: "00:10:00",
I want to sum up from all the documents and have to give a single integer in mongodb robo3T.
That is for the following documents:
[
{ timeTaken: "00:10:00" },
{ timeTaken: "01:10:00" },
{ timeTaken: "02:20:50" }
]
I want the result to be:
{ timeTaken: "03:40:50" }
Our strategy will be to split the string into minutes, seconds and hours, convert them to numbers, sum them up and then reconstruct the structure.
For this you will need access to operators like $toString and $toInt which means you can only do this for version 4.0+, for older Mongo versions you will have to read the documents and do this in code.
I've split the following query into multiple stages so it's clearer what I'm doing but this could be re-written into just 2 stages, the $group stage and a final $project stage to restructure the data.
db.collection.aggregate([
{
"$addFields": {
dataParts: {
$map: {
input: {
$split: [
"$data",
":"
]
},
as: "num",
in: {
"$toInt": "$$num"
}
}
},
}
},
{
$group: {
_id: null,
seconds: {
$sum: {
"$arrayElemAt": [
"$dataParts",
2
]
}
},
minutes: {
$sum: {
"$arrayElemAt": [
"$dataParts",
1
]
}
},
hours: {
$sum: {
"$arrayElemAt": [
"$dataParts",
0
]
}
},
}
},
{
"$addFields": {
finalSeconds: {
$mod: [
"$seconds",
60
]
},
}
},
{
$addFields: {
minutes: {
$sum: [
"$minutes",
{
"$divide": [
{
"$subtract": [
"$seconds",
"$finalSeconds"
]
},
60
]
}
]
},
}
},
{
$addFields: {
finalMinutes: {
$mod: [
"$minutes",
60
]
},
finalHours: {
$sum: [
"$hours",
{
$mod: [
{
$max: [
{
"$subtract": [
"$minutes",
60
]
},
0
]
},
60
]
}
]
}
}
},
{
$project: {
final: {
$concat: [
{
"$toString": "$finalHours"
},
":",
{
"$toString": "$finalMinutes"
},
":",
{
"$toString": "$finalSeconds"
},
]
}
}
}
])
Mongo Playground

How to write sum(if a>=10 and a<=100,a,0) like SQL in mongodb

I want to make the same SQL statement as specified select sum(if a >= 10 and a <= 100, a, 0) from table in MongoDb.
Therefore, I write:
{
"$project": {
"sumqty": {
"a1": {
"$cond": {
"if": {
"$and": [{"$gte":["$a",10]},
{"$lte":["$a",100]}]
},
"then": "$a",
"else": 0
}
}
}
}
}
Just use $cond and $group
db.table.aggregate([{
$group: {
_id: null,
totalSum: {
$sum: {
$cond: [{ $and: [{ $gte: ["$a", 10] }, { $lte: ["$a", 100] }] },
"$a",
0
]
}
}
}
}])