Mongodb group geolocation if within X meters - mongodb

So I am trying to group X points if they overlap by X meters say 5 meters.
I am wondering can MongoDB do this.
My current query is not smart at all
[{$project: {
'location': 1,
'day': {
'$dayOfMonth': '$updatedAt'
},
'month': {
'$month': '$updatedAt'
},
'year': {
'$year': '$updatedAt'
}
}}, {$match: {
'year': 2022
}}, {$group: {
'_id': '$location.coordinates',
'count': {
'$sum': 1
}
}}, {$project: {
'_id': 0,
'count': 1,
'location.coordinates': {
'$map': {
'input': '$_id',
'in': {
'$toString': '$$this'
}
}
}
}}, {$sort: {
count: -1
}}]

Related

Fill data with NULL value if it is not present in the timeperiod using mongodb aggregation pipeline

I have to write an aggreagtion pipeline in which I will pass:
Timestamps of start date and end data for a day
I have to divide the data into 30min buckets and find data in between that buckets like:
2023-01-16T00:30:00.000+00:00 , 2023-01-16T01:00:00.000+00:00, 2023-01-16T01:30:00.000+00:00 and so on.
If data is not present in any particular bucket fill the values of that bucketa with zero but give the timestamp like:
2023-01-16T01:00:00.000+00:00 ther is no data give {timestamp:2023-01-16T01:00:00.000+00:00,a:0,b:0,c:0}
I have done the following:
[{
$match: {
$and: [
{
timestamp: {
$gte: ISODate('2023-01-16T00:00:00.000Z'),
$lt: ISODate('2023-01-16T23:59:59.000Z')
}
}
]
}
}, {
$group: {
_id: {
$toDate: {
$subtract: [
{
$toLong: '$timestamp'
},
{
$mod: [
{
$toLong: '$timestamp'
},
1800000
]
}
]
}
},
in: {
$sum: '$a'
},
out: {
$sum: '$b'
},
Count: {
$sum: 1
}
}
}, {
$addFields: {
totalIn: {
$add: [
'$in',
'$out'
]
},{
$sort: {
_id: 1
}
}]
Result is:
[{
"_id": {
"2023-01-16T12:00:00.000+00:00"
}
},
"totalIn": 397,
"count":22
},
{
"_id": {
"2023-01-16T01:30:00.000+00:00"
}
},
"totalIn": 222,
"count":2
}
...]
expected result:
[{
"_id": {
"2023-01-16T12:00:00.000+00:00"
}
},
"totalIn": 397,
"count":22
},
{
"_id": {
"2023-01-16T12:30:00.000+00:00"
}
},
"totalIn": 0,
"count":0
},
{
"_id": {
"2023-01-16T01:00:00.000+00:00"
}
},
"totalIn": 0,
"count":0
},
{
"_id": {
"2023-01-16T12:00:00.000+00:00"
}
},
"totalIn": 222,
"count":2
}
...]
One option is to use $range with $dateAdd:
db.collection.aggregate([
{$match: {timestamp: {
$gte: startDate,
$lt: endDate
}}},
{$group: {
_id: {$dateTrunc: {date: "$timestamp", unit: "minute", binSize: 30}},
in: {$sum: "$a"},
out: {$sum: "$b"},
count: {$sum: 1}
}},
{$group: {
_id: 0,
data: {$push: {
timestamp: "$_id",
totalIn: {$add: ["$in", "$out"]},
count: "$count"
}}
}},
{$project: {
_id: 0, data: 1,
bins: {$map: {
input: {$range: [
0,
{$multiply: [
{$dateDiff: {
startDate: startDate,
endDate: endDate,
unit: "hour"
}},
2
]}
]},
in: {$dateAdd: {
startDate: startDate,
unit: "minute",
amount: {$multiply: ["$$this", 30]}
}}
}}
}},
{$unwind: "$bins"},
{$set: {data: {$filter: {
input: "$data",
cond: {$eq: ["$bins", "$$this.timestamp"]}
}}}},
{$project: {
_id: "$bins",
count: {$ifNull: [{$first: "$data.count"}, 0]},
totalIn: {$ifNull: [{$first: "$data.totalIn"}, 0]}
}}
])
See how it works on the playground example

MongoDB - get datewise/houlty aggregate count of column

I have set of documents in my mongoDB collection. I am looking to get datewise aggregate count of document if date range is more than a day and hourly aggregate count for same column if date query is for single day. The data may have documents with same conversationId, hence it is necessary to group with conversationId as well.Below is sample of data for reference
[
{
"_id":"c438a671-2391-4b85-815c-ecfcb3d2bb54",
"status":"INTERNAL_UPDATE",
"conversationId":"ac44781d-caab-4410-a708-9d6db8480fc3",
"messageIds":[],
"messageId":"4dc02026-ac06-4eb1-aa59-e385fcce4a36",
"responseId":"0c00c83d-61c5-4937-846c-2e6a46aae857",
"conversation":{},
"message":{},
"params":{},
"timestamp":"2021-05-04T11:40:06.552Z",
"source":{}
},
{
"_id":"98370ddf-9ff8-4347-bab7-1f7777ab9e9d",
"status":"NEW",
"conversationId":"b5dc39d2-56a1-4eb6-a728-cdbe33dca580",
"messageIds":[],
"messageId":"ba94b839-f795-44f2-aea0-173d26006f14",
"responseId":"a2b75364-447b-4345-8008-2beccd6cbb34",
"conversation":{},
"message":{},
"params":{},
"timestamp":"2021-05-05T11:40:30.897Z",
"source":{}
},
{
"_id":"db1eae2b-62d9-455c-ab46-dbfc5baf8b67",
"status":"INTERNAL_UPDATE",
"conversationId":"b5dc39d2-56a1-4eb6-a728-cdbe33dcb584",
"messageIds":[],
"messageId":"b83c743b-d36e-4fdd-9c03-21988af47263",
"responseId":"97198c09-0130-48dc-a225-6d0faeff3116",
"conversation":{},
"message":{},
"params":{},
"timestamp":"2021-05-05T11:40:31.418Z",
"source":{}
},
{
"_id":"12a21495-f857-4f18-a06e-f8ba0b951ade",
"status":"NEW",
"conversationId":"8e37c704-add8-4f9f-8e70-d630c24f653b",
"messageIds":[],
"messageId":"51a48362-545c-4f9f-930b-42e4841fc974",
"responseId":"4691468b-a43b-41d1-83df-1349fb554bfa",
"conversation":{},
"message":{},
"params":{},
"timestamp":"2021-05-06T11:43:58.174Z",
"source":{}
},
{
"_id":"4afaa735-4618-40cf-8b4f-00ee83b2c3c5",
"status":"INTERNAL_UPDATE",
"conversationId":"8e37c704-add8-4f9f-8e70-d630c24f653b",
"messageIds":[],
"messageId":"7c860126-bf1e-41b2-a7d3-6bcec3e8d5fb",
"responseId":"09cec9a1-2621-481d-b527-d98b007ef5be",
"conversation":{},
"message":{},
"params":{},
"timestamp":"2021-05-06T11:43:58.736Z",
"source":{}
},
{
"_id":"cf8deeca-2cfd-497e-b92b-03204c84217a",
"status":"NEW",
"conversationId":"3c6870b5-88d6-4e21-8629-28137dea3fee",
"messageIds":[],
"messageId":"da84e414-2269-4812-8ddd-e2cd6c9be4fd",
"responseId":"ae1014b2-0cc1-41f0-9990-cf724ed67ab7",
"conversation":{},
"message":{},
"params":{},
"timestamp":"2021-05-06T13:37:55.060Z",
"source":{}
}
]
Presently I am able to group by conversationId, but unable to get data aggregated datewise or on hourly basis if date range is on single date.
Below is the query for same
db.documentName.aggregate([
{
'$match': {
'$and': [
{
timestamp: {
'$gte': ISODate('2021-05-01T00:00:00.000Z'),
'$lte': ISODate('2021-05-10T23:59:59.999Z')
}
},
{ 'source.author': { '$regex': 'user', '$options': 'i' } },
{},
{}
]
}
},
{ '$group': {
_id: {'conversationId': '$conversationId'} },
{ '$count': 'document_count' }
])
I have tried adding something like, $hour: '$timestamp' with comma separation beside conversationId in $group, but its of no use and is giving error.
The desired result I am trying to get for above data is, something like this
[{"date": "2021-05-04", "doc_count": 1},
{"date": "2021-05-05", "doc_count": 2},
{"date": "2021-05-06", "doc_count": 2}]
As for 2021-05-05 there are 2 docs with different conversationId, and for 2021-05-06 there are 3 docs in total but 2 documents have same conversationId hence aggregate count for 2021-05-06 is also 2. Hope this clarifies my quesiton.
The question is not entirely clear to me, but it sounds like you want something like this:
The groupId is a field to rebuild the date including the hour, or not, according to your condition:
EDIT:
db.collection.aggregate([
{$match: {
timestamp: {
$gte: ISODate("2021-05-01T00:00:00.000Z"),
$lte: ISODate("2021-05-07T23:59:59.999Z")
}
}
},
{$project: {
conversationId: 1,
groupId: {
$dateFromParts: {
year: {$year: "$timestamp"},
month: {$month: "$timestamp"},
day: {$dayOfMonth: "$timestamp"},
hour: {$cond: [
{$gte: [
{$dateDiff: {
startDate: ISODate("2021-05-01T00:00:00.000Z"),
endDate: ISODate("2021-05-07T23:59:59.999Z"),
unit: "day"}}, 1]},
0,
{$hour: "$timestamp"}]}
}
}
}
},
{$group: {_id: {conversationId: "$conversationId", groupId: "$groupId"}}},
{$group: {_id: "$_id.groupId", doc_count: {$sum: 1}}},
{$project: {date: {$toString: "$_id"}, doc_count: 1, _id: 0}}
])
See how it works on the playground example
As suggested by #nimrodserok, for mongo version 4.2.9 the query would be
db.collection.aggregate([
{
$match: {
timestamp: {
$gte: ISODate("2021-05-01T00:00:00.000Z"),
$lte: ISODate("2021-05-07T23:59:59.999Z")
}
}
},
{
$project: {
conversationId: 1,
groupId: {
$dateFromParts: {
year: {
$year: "$timestamp"
},
month: {
$month: "$timestamp"
},
day: {
$dayOfMonth: "$timestamp"
},
hour: {
$cond: [
{
$gte: [
{
$subtract: [
{
$toLong: ISODate("2021-05-07T23:59:59.999Z")
},
{
$toLong: ISODate("2021-05-01T00:00:00.000Z")
}
]
},
86400000
]
},
0,
{
$hour: "$timestamp"
}
]
}
}
}
}
},
{
$group: {
_id: {
conversationId: "$conversationId",
groupId: "$groupId"
}
}
},
{
$group: {
_id: "$_id.groupId",
doc_count: {
$sum: 1
}
}
},
{
$project: {
date: {
$toString: "$_id"
},
doc_count: 1,
_id: 0
}
}
])

how do i count number of admins in mongoose?

when i perform the following aggregate.my expectation is to get roles count for dashboard as expected in my output below but what i am getting is 0 for admin,extension,seeder values respectively. what do you think is my mistake?
model
var Schema = mongoose.Schema(
{
username: String,
email: String,
password: String,
roles: [
{
type: mongoose.Schema.Types.ObjectId,
ref: "Role"
}
],
},
{ timestamps: true }
);
This is the aggregate i am working on...
controller
exports.userRoleCount = (rep,res,next) =>{
User.aggregate([
{$project: {
admin: {$cond: [{$eq: ["$roles", "['admin']"]}, 1, 0]},
seeder: {$cond: [{$eq: ["$roles", "seeder"]}, 1, 0]},
extension: {$cond: [{$eq: ["$roles", "extension"]}, 1, 0]},
}},
{$group: { _id: null, admin: {$sum: "admin"},
seeder: {$sum: "$seeder"},
extension: {$sum: "$extension"},
total: {$sum: 1},
}},
], (error,data)=>{
if (error){
return next(error);
} else {
res.json(data);
}
}
)
This is the output i get
output
{
"_id": null,
"admin": 0,
"seeder": 0,
"extension": 0,
"total": 9
}
My expectation was this.....
{
"_id": null,
"admin": 3,
"seeder": 4,
"extension": 2,
"total": 9
}
This is my roles array...
db.ROLES = ["user", "admin", "extension", "seeder"];
Hi,
{$unwind : "$roles"},
{$group: { _id: null,
admin: {$sum: {$eq : ["roles": "admin"]}},
seeder: {$sum: {$eq : ["roles": "seeder"]}},
total: {$sum: 1},
}},
I have changed the code fragment as follow and it worked!!
exports.userRoleCount = (rep,res,next) =>{
User.aggregate([
{
"$project": {
"_id": 1,
"roles": 1
}
},
{
"$unwind": "$roles"
},
{
"$group": {
"_id": '$roles',
"count": {"$sum": 1}
}
},
{"$sort": {"_id": -1}},
{"$limit": 5}
], (error,data)=>{
if (error){
return next(error);
} else {
res.json(data);
// console.log(data[1].count);
}
}
)
}
And the output looks like as follow
{
{
_id:1 // this id is for role admin
count: 2
},
{
_id:2 // this id is for role extension
count: 5
}
}

Sort array of objects, using values inside each object (aggregation framework)

Suppose I have these documents:
[
{
'_id': 1,
'roles': [
{
'k': 'free',
'v': 1
},
{
'k': 'pro',
'v': 5
},
{
'k': 'free',
'v': 2
}
]
},
{
'_id': 2,
'roles': [
{
'k': 'pro',
'v': 1
},
{
'k': 'free',
'v': 3
},
{
'k': 'free',
'v': 2
}
]
}
]
So for every _id, we have a array of documents called roles.
I need to sort inside the array roles, using the v field.
Expected output:
[
{
'_id': 1,
'roles': [
{
'k': 'free',
'v': 1
},
{
'k': 'free',
'v': 2
}
{
'k': 'pro',
'v': 5
}
]
},
{
'_id': 2,
'roles': [
{
'k': 'pro',
'v': 1
},
{
'k': 'free',
'v': 2
}
{
'k': 'free',
'v': 3
}
]
}
]
So I tried to use $sort:
{
'$sort': {
'roles.v': 1
}
}
But it does not sort inside the array.
You need to $unwind and $group to reconstruct.
([
{ $unwind: "$roles" },
{ $sort: { "roles.v": 1 }},
{ $group: {
_id: "$_id",
roles: { $push: "$roles" }
}}
])
In order to sort by _id and if $project is needed, this will work as well.
db.getCollection("collectionName").aggregate([
{ $unwind: "$roles" },
{ $sort: { "roles.v": 1 }},
{ $group: {
_id: "$_id",
roles: { $push: "$roles" }
}},
{$project: {
_id: "$_id",
roles: "$roles"
}},
{$sort: {"_id": 1}}
])
You can use a simple mongdb method $sortArray
The code is this:
db.collection.aggregate([
{
$project: {
_id: 0,
result: {
$sortArray: {
input: "$roles",
sortBy: {
v: 1
}
}
}
}
}
])

Merging multiple aggregation queries to one with MongoDB

I'm using these three queries to can have a python dataframe format with the columns : 'Date', '% part of business 2', '% part of business 3'. (for each day to have the percentage of gain from business 2 and 3).
query_business2 = collection.aggregate( [
{
'$match': {'Business': 2}
},
{
'$group': {
'_id': '$Date',
'stab2': {'$sum': '$Money'}
}
},
{
'$sort': {'_id': 1}
}
])
query_business3 = collection.aggregate([
{
'$match': {'Business':3}
},
{
'$group': {
'_id': '$Date',
'stab3': {'$sum': '$Money'}
}
},
{
'$sort': {'_id': 1}
}
])
query_total = collection.aggregate([
{
'$group': {
'_id': '$Date',
'total': {'$sum': '$Money'}
}
},
{
'$sort': {'_id': 1}
}
])
For this to be faster, I would like to merge these three queries into one. I tried using '$or' but didn't work for unashable dict.
Is there a better way to do that ? It might be possible to directly make the dataframe format without using pandas after this queries and to calculate directly the percentage of each business compared to the total money earned. Thank you for your help
Thanks to prasad_ the answer is :
query_business = collection.aggregate([
{
'$group':{
'_id': '$Date',
'total_2': {'$sum' : {'$cond': [{'$eq': ['$Business', 2]}, '$Money', 0]}},
'total_3': {'$sum' : {'$cond': [{'$eq': ['$Business', 3]}, '$Money', 0]}},
'total': {'$sum': '$Money'},
}
},
{
'$match': {'$and': [{ 'total_2': {'$gt': 0}}, {'total': {'$gt': 0}},{'total_3':{'$gt':0}}]}
},
{
'$addFields':{
'part_2': { "$multiply": [ { "$divide": ["$total_2","$total"] }, 100 ] },
'part_3': { "$multiply": [{'$divide': ['$total_3','$total']}, 100]}
}
}
])