Mongo DB aggregation query to get an attribute based on another array of objects - mongodb

I am trying to
get all the ids for which the period.startDate > sysdate
get all the ids for which the period.endDate < sysdate
from the JSON.
[
{
"id": 1,
"period":
[
{
"startDate": "2020-05-05",
"endDate": "2020-05-06"
},
{
"startDate": "2020-06-05",
"endDate": "2020-06-06"
}
]
},
{
"id": 2,
"period":
[
{
"startDate": "2024-07-05",
"endDate": "2024-07-06"
},
{
"startDate": "2024-08-05",
"endDate": "2024-08-06"
}
]
}
]
I have tried to go far as below aggregation:
[{
$project: {
_id: 0,
sId: '$id',
period: 1
} }, {
$unwind: {
path: '$period',
includeArrayIndex: 'index'
} }, {
$group: {
_id: '$sId',
minDate: {
$min: '$periods.startDate'
}
} }, {
$project: {
storeId: '$_id',
_id: 0,
minDated: {
$dateFromString: {
dateString: '$minDate'
}
},
today: ISODate('2022-08-03T11:37:03.954Z')
} }]

One option is using $reduce and $group:
db.collection.aggregate([
{$project: {
_id: 0,
id: 1,
minDate: {
$dateFromString: {
dateString: {
$reduce: {
input: "$period",
initialValue: {$first: "$period.startDate"},
in: {$min: ["$$value", "$$this.startDate"]}
}
}
}
},
maxDate: {
$dateFromString: {
dateString: {
$reduce: {
input: "$period",
initialValue: {$first: "$period.endDate"},
in: {$max: ["$$value", "$$this.startDate"]}
}
}
}
}
}
},
{$group: {
_id: 0,
startDateLargerIds: {
$push: {
$cond: [{$gt: ["$minDate", ISODate("2022-08-03T11:37:03.954Z")]},
"$id", "$$REMOVE"]}
},
endDateSmallerIds: {
$push: {
$cond: [{$lt: ["$maxDate", ISODate("2022-08-03T11:37:03.954Z")]},
"$id", "$$REMOVE"]}
}
}
},
{$unset: "_id"}
])
See how it works on the playground example

Related

Fill data with NULL value if it is not present in the timeperiod using mongodb aggregation pipeline

I have to write an aggreagtion pipeline in which I will pass:
Timestamps of start date and end data for a day
I have to divide the data into 30min buckets and find data in between that buckets like:
2023-01-16T00:30:00.000+00:00 , 2023-01-16T01:00:00.000+00:00, 2023-01-16T01:30:00.000+00:00 and so on.
If data is not present in any particular bucket fill the values of that bucketa with zero but give the timestamp like:
2023-01-16T01:00:00.000+00:00 ther is no data give {timestamp:2023-01-16T01:00:00.000+00:00,a:0,b:0,c:0}
I have done the following:
[{
$match: {
$and: [
{
timestamp: {
$gte: ISODate('2023-01-16T00:00:00.000Z'),
$lt: ISODate('2023-01-16T23:59:59.000Z')
}
}
]
}
}, {
$group: {
_id: {
$toDate: {
$subtract: [
{
$toLong: '$timestamp'
},
{
$mod: [
{
$toLong: '$timestamp'
},
1800000
]
}
]
}
},
in: {
$sum: '$a'
},
out: {
$sum: '$b'
},
Count: {
$sum: 1
}
}
}, {
$addFields: {
totalIn: {
$add: [
'$in',
'$out'
]
},{
$sort: {
_id: 1
}
}]
Result is:
[{
"_id": {
"2023-01-16T12:00:00.000+00:00"
}
},
"totalIn": 397,
"count":22
},
{
"_id": {
"2023-01-16T01:30:00.000+00:00"
}
},
"totalIn": 222,
"count":2
}
...]
expected result:
[{
"_id": {
"2023-01-16T12:00:00.000+00:00"
}
},
"totalIn": 397,
"count":22
},
{
"_id": {
"2023-01-16T12:30:00.000+00:00"
}
},
"totalIn": 0,
"count":0
},
{
"_id": {
"2023-01-16T01:00:00.000+00:00"
}
},
"totalIn": 0,
"count":0
},
{
"_id": {
"2023-01-16T12:00:00.000+00:00"
}
},
"totalIn": 222,
"count":2
}
...]
One option is to use $range with $dateAdd:
db.collection.aggregate([
{$match: {timestamp: {
$gte: startDate,
$lt: endDate
}}},
{$group: {
_id: {$dateTrunc: {date: "$timestamp", unit: "minute", binSize: 30}},
in: {$sum: "$a"},
out: {$sum: "$b"},
count: {$sum: 1}
}},
{$group: {
_id: 0,
data: {$push: {
timestamp: "$_id",
totalIn: {$add: ["$in", "$out"]},
count: "$count"
}}
}},
{$project: {
_id: 0, data: 1,
bins: {$map: {
input: {$range: [
0,
{$multiply: [
{$dateDiff: {
startDate: startDate,
endDate: endDate,
unit: "hour"
}},
2
]}
]},
in: {$dateAdd: {
startDate: startDate,
unit: "minute",
amount: {$multiply: ["$$this", 30]}
}}
}}
}},
{$unwind: "$bins"},
{$set: {data: {$filter: {
input: "$data",
cond: {$eq: ["$bins", "$$this.timestamp"]}
}}}},
{$project: {
_id: "$bins",
count: {$ifNull: [{$first: "$data.count"}, 0]},
totalIn: {$ifNull: [{$first: "$data.totalIn"}, 0]}
}}
])
See how it works on the playground example

How to use $match (multiple conditions) and $group in Mongodb

have list of records with the following fields - postBalance, agentId, createdAt, type. I want to filter by “type” and date. After this is done I want to get the $last postBalance for each agent based on the filter and sum up the postBalance. I have been struggling with this using this query
db.transaction.aggregate(
[{ $match: {
$and: [ {
createdAt: { $gte: ISODate('2022-09-15'), $lt:
('2022-09-16') } },
{ type: "CASH_OUT"}]}},
{
$group:
{
_id: {createdAt: {$last: "$createdAt"}},
totalAmount: { $sum: "$postBalance" },
}
}
]
)
An empty array is returned with this query and there are data in the collection.
Below are samples of the documents
{
"_id": {
"$oid": "6334cefd0048787d5535ff16"
},
"type": "CASH_OUT",
"postBalance": {
"$numberDecimal": "23287.625"
},
"createdAt": {
"$date": {
"$numberLong": "1664405245000"
}
},
}
{
"_id": {
"$oid": "6334d438c1ab8a577677cbf3"
},
"userID": {
"$oid": "62f27bc29f51747015fdb941"
},
"aggregatorID": "0000116",
"transactionFee": {
"$numberDecimal": "0.0"
},
"type": "AIRTIME_VTU",
"postBalance": {
"$numberDecimal": "2114.675"
},
"walletHistoryID": 613266,
"walletID": 1720,
"walletActionAt": {
"$date": {
"$numberLong": "1664406584000"
}
},
{
"type": "FUNDS_TRANSFER",
"postBalance": {
"$numberDecimal": "36566.39"
},
"createdAt": {
"$date": {
"$numberLong": "1664407090000"
}
}
}
This is the output I am expecting
{
"date" : 2022-10-09,
"CASHOUT ": 897663,088,
"FUNDS_TRANSFER": 8900877,
"AIRTIME_VTU": 8890000
}
How can my query be aggregated to get this? Thanks
It look like you want something like:
db.collection.aggregate([
{$match: {
createdAt: {
$gte: ISODate("2022-09-15T00:00:00.000Z"),
$lt: ISODate("2022-09-30T00:00:00.000Z")
}
}
},
{$group: {
_id: "$type",
createdAt: {$first: "$createdAt"},
totalAmount: {$sum: "$postBalance"}
}
},
{$group: {
_id: 0,
createdAt: {$first: "$createdAt"},
data: {$push: {k: "$_id", v: "$totalAmount"}}
}
},
{$project: {
data: {$arrayToObject: "$data"},
createdAt: 1,
_id: 0
}
},
{$set: {"data.date": "$createdAt"}},
{$replaceRoot: {newRoot: "$data"}}
])
See how it works on the playground example

MongoDB - get datewise/houlty aggregate count of column

I have set of documents in my mongoDB collection. I am looking to get datewise aggregate count of document if date range is more than a day and hourly aggregate count for same column if date query is for single day. The data may have documents with same conversationId, hence it is necessary to group with conversationId as well.Below is sample of data for reference
[
{
"_id":"c438a671-2391-4b85-815c-ecfcb3d2bb54",
"status":"INTERNAL_UPDATE",
"conversationId":"ac44781d-caab-4410-a708-9d6db8480fc3",
"messageIds":[],
"messageId":"4dc02026-ac06-4eb1-aa59-e385fcce4a36",
"responseId":"0c00c83d-61c5-4937-846c-2e6a46aae857",
"conversation":{},
"message":{},
"params":{},
"timestamp":"2021-05-04T11:40:06.552Z",
"source":{}
},
{
"_id":"98370ddf-9ff8-4347-bab7-1f7777ab9e9d",
"status":"NEW",
"conversationId":"b5dc39d2-56a1-4eb6-a728-cdbe33dca580",
"messageIds":[],
"messageId":"ba94b839-f795-44f2-aea0-173d26006f14",
"responseId":"a2b75364-447b-4345-8008-2beccd6cbb34",
"conversation":{},
"message":{},
"params":{},
"timestamp":"2021-05-05T11:40:30.897Z",
"source":{}
},
{
"_id":"db1eae2b-62d9-455c-ab46-dbfc5baf8b67",
"status":"INTERNAL_UPDATE",
"conversationId":"b5dc39d2-56a1-4eb6-a728-cdbe33dcb584",
"messageIds":[],
"messageId":"b83c743b-d36e-4fdd-9c03-21988af47263",
"responseId":"97198c09-0130-48dc-a225-6d0faeff3116",
"conversation":{},
"message":{},
"params":{},
"timestamp":"2021-05-05T11:40:31.418Z",
"source":{}
},
{
"_id":"12a21495-f857-4f18-a06e-f8ba0b951ade",
"status":"NEW",
"conversationId":"8e37c704-add8-4f9f-8e70-d630c24f653b",
"messageIds":[],
"messageId":"51a48362-545c-4f9f-930b-42e4841fc974",
"responseId":"4691468b-a43b-41d1-83df-1349fb554bfa",
"conversation":{},
"message":{},
"params":{},
"timestamp":"2021-05-06T11:43:58.174Z",
"source":{}
},
{
"_id":"4afaa735-4618-40cf-8b4f-00ee83b2c3c5",
"status":"INTERNAL_UPDATE",
"conversationId":"8e37c704-add8-4f9f-8e70-d630c24f653b",
"messageIds":[],
"messageId":"7c860126-bf1e-41b2-a7d3-6bcec3e8d5fb",
"responseId":"09cec9a1-2621-481d-b527-d98b007ef5be",
"conversation":{},
"message":{},
"params":{},
"timestamp":"2021-05-06T11:43:58.736Z",
"source":{}
},
{
"_id":"cf8deeca-2cfd-497e-b92b-03204c84217a",
"status":"NEW",
"conversationId":"3c6870b5-88d6-4e21-8629-28137dea3fee",
"messageIds":[],
"messageId":"da84e414-2269-4812-8ddd-e2cd6c9be4fd",
"responseId":"ae1014b2-0cc1-41f0-9990-cf724ed67ab7",
"conversation":{},
"message":{},
"params":{},
"timestamp":"2021-05-06T13:37:55.060Z",
"source":{}
}
]
Presently I am able to group by conversationId, but unable to get data aggregated datewise or on hourly basis if date range is on single date.
Below is the query for same
db.documentName.aggregate([
{
'$match': {
'$and': [
{
timestamp: {
'$gte': ISODate('2021-05-01T00:00:00.000Z'),
'$lte': ISODate('2021-05-10T23:59:59.999Z')
}
},
{ 'source.author': { '$regex': 'user', '$options': 'i' } },
{},
{}
]
}
},
{ '$group': {
_id: {'conversationId': '$conversationId'} },
{ '$count': 'document_count' }
])
I have tried adding something like, $hour: '$timestamp' with comma separation beside conversationId in $group, but its of no use and is giving error.
The desired result I am trying to get for above data is, something like this
[{"date": "2021-05-04", "doc_count": 1},
{"date": "2021-05-05", "doc_count": 2},
{"date": "2021-05-06", "doc_count": 2}]
As for 2021-05-05 there are 2 docs with different conversationId, and for 2021-05-06 there are 3 docs in total but 2 documents have same conversationId hence aggregate count for 2021-05-06 is also 2. Hope this clarifies my quesiton.
The question is not entirely clear to me, but it sounds like you want something like this:
The groupId is a field to rebuild the date including the hour, or not, according to your condition:
EDIT:
db.collection.aggregate([
{$match: {
timestamp: {
$gte: ISODate("2021-05-01T00:00:00.000Z"),
$lte: ISODate("2021-05-07T23:59:59.999Z")
}
}
},
{$project: {
conversationId: 1,
groupId: {
$dateFromParts: {
year: {$year: "$timestamp"},
month: {$month: "$timestamp"},
day: {$dayOfMonth: "$timestamp"},
hour: {$cond: [
{$gte: [
{$dateDiff: {
startDate: ISODate("2021-05-01T00:00:00.000Z"),
endDate: ISODate("2021-05-07T23:59:59.999Z"),
unit: "day"}}, 1]},
0,
{$hour: "$timestamp"}]}
}
}
}
},
{$group: {_id: {conversationId: "$conversationId", groupId: "$groupId"}}},
{$group: {_id: "$_id.groupId", doc_count: {$sum: 1}}},
{$project: {date: {$toString: "$_id"}, doc_count: 1, _id: 0}}
])
See how it works on the playground example
As suggested by #nimrodserok, for mongo version 4.2.9 the query would be
db.collection.aggregate([
{
$match: {
timestamp: {
$gte: ISODate("2021-05-01T00:00:00.000Z"),
$lte: ISODate("2021-05-07T23:59:59.999Z")
}
}
},
{
$project: {
conversationId: 1,
groupId: {
$dateFromParts: {
year: {
$year: "$timestamp"
},
month: {
$month: "$timestamp"
},
day: {
$dayOfMonth: "$timestamp"
},
hour: {
$cond: [
{
$gte: [
{
$subtract: [
{
$toLong: ISODate("2021-05-07T23:59:59.999Z")
},
{
$toLong: ISODate("2021-05-01T00:00:00.000Z")
}
]
},
86400000
]
},
0,
{
$hour: "$timestamp"
}
]
}
}
}
}
},
{
$group: {
_id: {
conversationId: "$conversationId",
groupId: "$groupId"
}
}
},
{
$group: {
_id: "$_id.groupId",
doc_count: {
$sum: 1
}
}
},
{
$project: {
date: {
$toString: "$_id"
},
doc_count: 1,
_id: 0
}
}
])

Mongodb aggregation group by inner array

I have aggregated my data to give this output.
[
{
"_id": {
"source": "source_1",
"medium": "medium_1",
"campaign": "campaign_1"
},
"visitors": [
{
"_id": "60073f564d6c915237dbe158",
"location": {
"city": "Miami",
"postal": "33177"
}
},
{
"_id": "60073f564d6c915237dbe158",
"location": {
"city": "Miami",
"postal": "33163"
}
}
]
},
{
"_id": {
"source": "source_2",
"medium": "medium_2",
"campaign": "campaign_2"
},
"visitors": [
{
"_id": "60073f564d6c915237dbe158",
"location": {
"city": "Miami",
"postal": "33177"
}
},
{
"_id": "60073f564d6c915237dbe158",
"location": {
"city": "Miami",
"postal": "33162"
}
}
]
}
]
I want to group inner visitors array and get this output.
[
{
"_id": {
"source": "source_1",
"medium": "medium_1",
"campaign": "campaign_1"
},
"visitors": [
{
"city": "Miami",
"postal": "33177",
"count": 2
},
{
"city": "Miami",
"postal": "33163",
"count": 5
}
]
},
{
"_id": {
"source": "source_2",
"medium": "medium_2",
"campaign": "campaign_2"
},
"visitors": [
{
"city": "Miami",
"postal": "33177",
"count": 1
},
{
"city": "Miami",
"postal": "33163",
"count": 3
}
]
}
]
aggregate pipeline executed on campaigns collection:
[{$match: {
website_id: 1,
$or: [
{
source:{
$regex:/goo/,
$options: 'i'
}
},
{
medium:{
$regex:/goo/,
$options: 'i'
}
},
{
campaign:{
$regex:/goo/,
$options: 'i'
}
}
]
}}, {$addFields: {
visitor_id: {
$toObjectId: "$visitor_id"
}
}}, {$lookup: {
from: 'visitors',
localField: 'visitor_id',
foreignField: '_id',
as: 'visitors'
}}, {$unwind: {
path: '$visitors'
}}, {$group: {
_id: {
source: '$source',
medium: '$medium',
campaign: '$campaign',
},
visitors:{
$push: '$visitors'
}
}}, {$unwind: {
path: '$visitors'
}}, {$group: {
_id: {
'city': '$visitors.location.city',
'postal': '$visitors.location.postal'
},
'count': {
'$sum': 1
}
}}, {$project: {
'_id': 0,
'city': '$_id.city',
'postal': '$_id.postal',
'count': '$count',
'total': {
'$sum': '$count'
}
}}, {$project: {
'city': '$city',
'postal': '$postal',
'count': '$count',
'total': {
'$sum': '$total'
}
}}]
So the idea is first group the visitors by their postal number along with the campaign details to get the count and then aggregate it by only campaign details to accumulate the visitors.
Try this query:
db.campaigns.aggregate([
{
$match: {
// Put your condtions here.
}
},
{
$project: {
source: 1,
medium: 1,
campaign: 1,
visitor_id: 1
}
},
{
$addFields: {
visitor_id: { $toObjectId: "$visitor_id" }
}
},
{
$lookup: {
from: "visitors",
let: { "visitor_id": "$visitor_id" },
pipeline: [
{
$match: {
$expr: { $eq: ["$_id", "$$visitor_id"] }
}
},
{
$project: {
location: {
city: 1,
postal: 1
}
}
}
],
as: "visitor"
}
},
{ $unwind: "$visitor" },
{
$group: {
_id: {
source: "$source",
medium: "$medium",
campaign: "$campaign",
postal: "$visitor.location.postal"
},
visitors: { $push: "$visitor" },
count: { $sum: 1 }
}
},
{
$group: {
_id: {
source: "$_id.source",
medium: "$_id.medium",
campaign: "$_id.campaign"
},
visitors: {
$push: {
city: { $arrayElemAt: ["$visitors.location.city", 0] },
postal: { $arrayElemAt: ["$visitors.location.postal", 0] },
count: "$count"
}
}
}
}
]);
You need to correct group stage,
$group by source, medium, campaign and postal, get first city and count total sum
$group by source, medium, campaign and construct visitors array with required fields
db.campaigns.aggregate([
{ $match: .. } //skipped
{ $addFields: .. }, //skipped
{ $lookup: .. }, //skipped
{ $unwind: .. }, //skipped
{
$group: {
_id: {
source: "$source",
medium: "$medium",
campaign: "$campaign",
postal: "$visitors.location.postal"
},
city: { $first: "$visitors.location.city" },
count: { $sum: 1 }
}
},
{
$group: {
_id: {
source: "$_id.source",
medium: "$_id.medium",
campaign: "$_id.campaign"
},
visitors: {
$push: {
city: "$city",
postal: "$_id.postal",
count: "$count"
}
}
}
}
])
Playground

Mongodb aggregate $group and count for date ranges

I have documents like these:
{
"_id" : ObjectId("5cc80389c723e046f504b5a9"),
"adddress" : "string",
"checkIn" : "2019-04-30T08:12:57.909Z"
},
{
"_id" : ObjectId("5cc995f5a6f3eb7c483b019f"),
"adddress" : "string",
"checkIn" : "2019-05-01T12:49:57.561Z"
}
I have tried aggrgation like this:
var start = new Date("2019-04-30T08:12:57.909Z");
var end = new Date("2019-05-01T12:49:57.561Z");
var pipeline = [
{
$match: {
checkIn: {
$gte: start,
$lte: end
}
}
},
{
$group: {
_id: {
year: {
$year: "$checkIn"
},
month: {
$month: "$checkIn"
},
day: {
$dayOfYear: "$checkIn"
}
},
count: {
$sum: 1
}
}
}];
db.collections.aggregate(pipeline).toArray()
Is it possible to count them by checkIn date and get result like this:
"_id": [{
"checkIn": "2019-03-15T00:00:00Z",
"count": 4
}, {
"checkIn": "2019-04-30T00:00:00Z",
"count": 1
}, {
"checkIn": "2019-05-10T00:00:00Z",
"count": 1
}],
The result is shown the total number of the day.
{$project: {
checkIn: { $dateToString: { format: '%Y-%m-%d', date: '$checkIn' } }
}},
{$group: {
_id: '$checkIn',
checkIn: {$first: '$checkIn'},
count: {$sum: 1}
}},
{$sort: {checkIn: 1}}
Try this: I have tested this query and its working.
db.sample.aggregate([{
$addFields: {
date: {
$dateFromString: {
dateString: "$checkIn"
}
}
}
},{
$match: {
date: {
$gte: start,
$lte: end
}
}
},
{
$addFields: {
dateString: {
$dateToString: {
format: "%Y-%m-%d",
date: "$date"
}
}
}
},
{
$group: {
_id: "$dateString",
count: {
$sum: 1
}
}
}
]);