How to perform count(column) in mongodb with aggregation? - mongodb

I am trying to do to the equivalent of the following query in mongodb:
select count(*), count(category), sum(price) from sales group by usergender
Here is what the documents in my collection look like:
{
"_id" : ObjectId("54da8b0aa7c80aed4a9f9f33"),
"userincome" : "$100,000 - $200,000",
"county" : "Los Angeles",
"userstate" : "California",
"usercity" : "Los Angeles",
"price" : 100,
"category" : "Swimwear",
"usergender" : "Male"
}
Here is my aggregation which returns count(*) and sum(price) but I am not sure how to add in count(category).
db['stream.sales'].aggregate([
{
$group:{
_id:"$usergender",
price:{
$sum:"$price"
},
_count:{
$sum:1
}
}
}
])
I know I can run a separate aggregation to get count(category) but I would like to do it in aggregation, because I don't want all my results filtered where category exists = true.
db['stream.sales'].aggregate([
{
$match:{
'category':{
"$exists":true
}
}
},
{
$group:{
_id:"$usergender",
count:{
$sum:1
}
}
}
]);
Edit:
Was able to find the solution with the help of wdberkleys response:
db['stream.sales'].aggregate([
{ "$group" : {
"_id" : "$usergender",
"count" : { "$sum" : 1 },
"price" : { "$sum" : "$price" },
"category" : { "$push" : "$category" }
} },
{ "$project" : {
"count" : 1,
"size" : 1,
"categories" : { "$size" : "$category" }
} }
])

Push the categories to a set during the $group, then $project the size of the resulting set of categories:
db.stream.sales.aggregate([
{ "$group" : {
"_id" : "$usergender",
"count" : { "$sum" : 1 },
"price" : { "$sum" : "$price" },
"categories" : { "$addToSet" : "$category" }
} },
{ "$project" : {
"count" : 1,
"size" : 1,
"categories" : { "$size" : "$category" }
} }
])

Related

Mongodb sort on a Text condition

I have collection like this.
[{
"_id" : ObjectId("62bae0858e4132ca723f00d4"),
"appliedDate" : ISODate("2022-06-28T00:00:00Z"),
"status" : "Approved"
},
{
"_id" : ObjectId("62bae0858e4132ca723f00d4"),
"appliedDate" : ISODate("2022-06-24T00:00:00Z"),
"status" : "Applied"
},
{
"_id" : ObjectId("62bae0858e4132ca723f00d4"),
"appliedDate" : ISODate("2022-06-25T00:00:00Z"),
"status" : "Applied"
},
{
"_id" : ObjectId("62bae0858e4132ca723f00d4"),
"appliedDate" : ISODate("2022-06-25T00:00:00Z"),
"status" : "Absent"
}]
I need to sort the status by Applied 1st and then by appliedDate
db.leaverequest.aggregate([
{ $match: { $text: { $search: "Applied" } } },
{ $sort: { score: { $meta: "textScore" } } }
]).pretty()
I tried the above aggregate query but its not working as expected
You can do like this
db.collection.aggregate([
{
"$sort": {
"status": 1, //sort by status
"appliedDate": 1 //if same, use appliedDate for collision resolution
}
}
])

Multiple condition on same column mongodb

{
"_id" : ObjectId("5ccfe06e2434de5c345d058e"),
"user_id" : 1,
"event" : "eventA",
"channel_id" : "1098",
"channel_node_id" : "2177",
"channel_name" : "New work",
"created_at" : NumberLong("1557127278000"),
"updated_at" : NumberLong("1557127278000"),
"user_raw_data" : {
"Name" : "akhilesh",
"Mobile" : "1234567890",
"Email" : "akhilesh#test.com"
},
{
"_id" : ObjectId("5ccfe06e2434de5c345d058e"),
"user_id" : 1
"event" : "eventB,
"channel_id" : "1098",
"channel_node_id" : "2177",
"channel_name" : "New work",
"created_at" : NumberLong("1557127278000"),
"updated_at" : NumberLong("1557127278000"),
"user_raw_data" : {
"Name" : "akhilesh",
"Mobile" : "1234567890",
"Email" : "akhilesh#test.com"
},
{
"_id" : ObjectId("5ccfe06e2434de5c345d058e"),
"user_id" : 1
"event" : "eventC,
"channel_id" : "1098",
"channel_node_id" : "2178",
"channel_name" : "New work",
"created_at" : NumberLong("1557127278000"),
"updated_at" : NumberLong("1557127278000"),
"user_raw_data" : {
"Name" : "akhilesh",
"Mobile" : "1234567890",
"Email" : "akhilesh#test.com"
}
,
{
"_id" : ObjectId("5ccfe06e2434de5c345d058e"),
"user_id" : 2
"event" : "eventA,
"channel_id" : "1098",
"channel_node_id" : "2178",
"channel_name" : "New work",
"created_at" : NumberLong("1557127278000"),
"updated_at" : NumberLong("1557127278000"),
"user_raw_data" : {
"Name" : "akhilesh",
"Mobile" : "1234567890",
"Email" : "akhilesh#test.com"
}
Now I want to get the count of events performed by the user so far. For eventA count will be 2, for eventB 1, and for eventC is 1. But this query will have multiple conditions so my condition is getting the count only if the user has performed (eventA or eventB) and eventC. So in that case from above doc user id 2 event will not be considered because they have not performed (eventA or eventB)
Also along with event match with mutiple and or condition I also want to apply filter on user_raw_data so my query should be like this
db.web_channel_events.aggregate([
{
$match: {
"channel_id": "1098",
"channel_node_id": "2177"
}
},
{
$group: {
"_id": {
"user_id": "$user_id",
"event": "$event"
},
"count": {
$sum: 1
}
}
},
{
$group: {
"_id": "$_id.user_id",
"event_details": {
$push: {
"k": "$_id.event",
"v": "$count"
}
}
}
},
{
$match: {
$and: [
{
$or: [
{
"event_details.k": "eventA",
"event_details.v": {
"$gte": 1
}
},
{
"event_details.k": "eventB",
"event_details.v": {
"$gte": 1
}
}
]
},
{
"event_details.k": "eventC",
"event_details.v": {
"$gte": 1
}
},
{
"user_raw_data.Name": "akhilesh"
}
]
}
},
{
"$unwind": "$event_details"
},
{
$group: {
"_id": "$event_details.k",
"count": {
$sum: "$event_details.v"
}
}
}
]).pretty();
The following query can get us the expected output:
db.collection.aggregate([
{
$group:{
"_id":{
"user_id":"$user_id",
"event":"$event"
},
"user_id":{
$first:"$user_id"
},
"event":{
$first:"$event"
},
"count":{
$sum:1
}
}
},
{
$group:{
"_id":"$user_id",
"user_id":{
$first:"$user_id"
},
"event_details":{
$push:{
"k":"$event",
"v":"$count"
}
}
}
},
{
$addFields:{
"event_details":{
$arrayToObject:"$event_details"
}
}
},
{
$match:{
$and:[
{
$or:[
{
"event_details.eventA":{
$gt:0
}
},
{
"event_details.eventB":{
$gt:0
}
}
]
},
{
"event_details.eventC":{
$gt:0
}
}
]
}
},
{
$group:{
"_id":null,
"eventA":{
$sum:"$event_details.eventA"
},
"eventB":{
$sum:"$event_details.eventB"
},
"eventC":{
$sum:"$event_details.eventC"
}
}
},
{
$project:{
"_id":0,
"event_details.eventA":"$eventA",
"event_details.eventB":"$eventB",
"event_details.eventC":"$eventC"
}
}
]).pretty()
Data set:
{
"_id" : ObjectId("5ccfe06e2434de5c345d0588"),
"event" : "eventA",
"user_id" : 1,
"created_at" : NumberLong("1557127278000"),
"updated_at" : NumberLong("1557127278000")
}
{
"_id" : ObjectId("5d8b132388edcf85b0aaa609"),
"event" : "eventA",
"user_id" : 1,
"created_at" : NumberLong("1557127278000"),
"updated_at" : NumberLong("1557127278000")
}
{
"_id" : ObjectId("5d8b132388edcf85b0aaa60a"),
"event" : "eventB",
"user_id" : 1,
"created_at" : NumberLong("1557127278000"),
"updated_at" : NumberLong("1557127278000")
}
{
"_id" : ObjectId("5d8b132388edcf85b0aaa60b"),
"event" : "eventC",
"user_id" : 1,
"created_at" : NumberLong("1557127278000"),
"updated_at" : NumberLong("1557127278000")
}
{
"_id" : ObjectId("5d8b132388edcf85b0aaa60c"),
"event" : "eventC",
"user_id" : 1,
"created_at" : NumberLong("1557127278000"),
"updated_at" : NumberLong("1557127278000")
}
{
"_id" : ObjectId("5d8b132388edcf85b0aaa60d"),
"event" : "eventC",
"user_id" : 2,
"created_at" : NumberLong("1557127278000"),
"updated_at" : NumberLong("1557127278000")
}
Output:
{ "event_details" : { "eventA" : 2, "eventB" : 1, "eventC" : 2 } }
Query analysis:
Grouping the data on the basis of user_id and event and calculating the count of that specific incident
Grouping only on the basis of user_id and pushing event and its
count into an array event_details as key-value pairs
Converting the event_details into an object
Applying the condition that the event count for ( A or B ) and C
should be greater than 0
Sum up individual event counts

MongoDB count distinct items

I have following query on a list with this fields : key,time,p,email
use app_db;
db.getCollection("app_log").aggregate(
[
{
"$match" : {
"key" : "login"
}
},
{
"$group" : {
"_id" : {
"$substr" : [
"$time",
0.0,
10.0
]
},
"total" : {
"$sum" : "$p"
},
"count" : {
"$sum" : 1.0
}
}
}
]
);
and the output is something like this :
{
"_id" : "2019-08-25",
"total" : NumberInt(623),
"count" : 400.0
}
{
"_id" : "2019-08-24",
"total" : NumberInt(2195),
"count" : 1963.0
}
{
"_id" : "2019-08-23",
"total" : NumberInt(1294),
"count" : 1706.0
}
{
"_id" : "2019-08-22",
"total" : NumberInt(53),
"count" : 1302.0
}
But I need the count to be distinctive on email field, which is count number of distinct email addresses who logged in per day and their p value is greater 0
You need $addToSet to get an array of unique email values per day and then you can use $size to count the number of items in that array:
db.getCollection("app_log").aggregate(
[
{
"$match" : {
"key" : "login"
}
},
{
"$group" : {
"_id" : {
"$substr" : [
"$time",
0.0,
10.0
]
},
"total" : {
"$sum" : "$p"
},
"emails" : {
"$addToSet": "$email"
}
}
},
{
$project: {
_id: 1,
total: 1,
countDistinct: { $size: "$emails" }
}
}
]
);

Mongodb find data and groupby for another column

{
"_id" : ObjectId("5763e4d6c0140edcb8731485"),
"_class" : "net.microservice.product.domain.Product",,
"createdAt" : ISODate("2016-06-17T11:53:58.228Z"),
"createdBy" : "user-0",
"modifiedAt" : ISODate("2016-06-21T06:21:47.524Z"),
"modifiedBy" : "user-0",
"merchant" : "a746f24safa5-e96f-4281-9759-a4a02b306d77",
"type" : DBRef("productTypes", ObjectId("575fd99236623f70c959247f")),
"fields" : {
"Image4" : {
"value" : "http://i.hizliresim.com/ZdELXa.jpg",
"detail" : {
"revisedBy" : "CTA",
"revisionDate" : ISODate("2016-06-21T06:21:47.204Z")
}
},
"Image3" : {
"value" : "http://i.hizliresim.com/l1WkqX.jpg",
"detail" : {
"revisedBy" : "CTA",
"revisionDate" : ISODate("2016-06-21T06:21:47.204Z")
}
},
"Image2" : {
"value" : "http://i.hizliresim.com/VYMl9n.jpg",
"detail" : {
"revisedBy" : "CTA",
"revisionDate" : ISODate("2016-06-21T06:21:47.204Z")
}
},
"Kur" : {
"value" : "TL",
"detail" : {
"revisedBy" : "CTA",
"revisionDate" : ISODate("2016-06-21T06:21:47.204Z")
}
},
"Image1" : {
"value" : "http://i.hizliresim.com/nrWAQ0.jpg",
"detail" : {
"revisedBy" : "CTA",
"revisionDate" : ISODate("2016-06-21T06:21:47.204Z")
}
},
"uploadDate" : ISODate("2016-06-17T11:53:00Z"),
"tasks" : [ ]
}
this is sample of database. I want to get data in which:
- modifiedAt is before "modifiedAt" : ISODate("2016-07-21T06:21:47.524Z"),
so i do this and this works:
db.products.find({
'modifiedAt':
{$lte: ISODate("2016-10-18T13:05:18.961Z"
)} }).
count()
14999
But i need to find for each merchant. Beause 14999 result is not true because a merchant have lots of product so 14999 includes multiple products.
I need to group by merchant and distinct. I couldnot do it.
i do this but
db.products.
aggregate([ {
$group: {
_id: '$merchant', } }, {
$match: {
modifiedAt:
{$lte: ISODate("2016-06-18T13:05:18.961Z")} }} ])
brings nothing and no error.
you can try something like this. This gives you the number of products by merchant.
db.products.aggregate([
{$match: {modifiedAt:{$lte: ISODate("2016-06-21T06:21:47.524Z")}}},
{$group: { _id: "$merchant",count: { $sum: 1 }}}
])
Output:
{ "_id" : "a89846f24safa5-e96f-4281-9759-a4a02b306d77", "count" : 1 }
Always place the $match as early in the aggregation pipeline as possible. Because $match limits the total number of documents in the aggregation pipeline, earlier $match operations minimize the amount of processing down the pipe.
So your query would be like
db.products.aggregate([
{
$match: {
modifiedAt: {
$lte: ISODate("2016-06-18T13:05:18.961Z")
}
}
},
{
$group: {
_id: '$merchant'
}
}
])

Get lowest per date from multiple arrays in mongodb

I've the following structure of docs:
{
"_id" : ObjectId("5786458371d24d924d8b4575"),
"uniqueNumber" : "3899822714",
"lastUpdatedAt" : ISODate("2016-07-13T20:11:11.000Z"),
"new" : [
{
"price" : 8.4,
"created" : ISODate("2016-07-13T13:11:28.000Z")
},
{
"price" : 10.0,
"created" : ISODate("2016-07-13T14:50:56.000Z")
}
],
"used" : [
{
"price" : 10.99,
"created" : ISODate("2016-07-08T13:46:31.000Z")
},
{
"price" : 8.59,
"created" : ISODate("2016-07-13T13:11:28.000Z")
}
]
}
Now I need to get a list that gives me the lowest price of each array per date.
So, as example:
{
"uniqueNumber" : 1234,
"prices" : {
"created" : 2016-07-08,
"minNew" : 123,
"minUsed" : 22
}
}
By now I've built the following query
db.getCollection('col').aggregate([
{
$match : {
"uniqueNumber" : "3899822714"
}
},
{
$unwind : "$used"
},
{
$project : {
"uniqueNumber" : "$uniqueNumber",
"price" : "$used.price",
"ts" : "$used.created"
}
},
{
$sort : { "ts" : 1 }
},
{
$group : {_id: "$uniqueNumber", priceOfMaxTS : { $min: "$price" }, ts : { $last: "$ts" }}
}
]);
But this one will only give me the lowest price for the highest date. I couldn't really find anything that pushes me to the right direction to get the desired result.
UPDATE
I've found a way to get the lowest price of the used array grouped by day with this query:
db.getCollection('col').aggregate([
{
$match : {
"uniqueNumber" : "3899822714"
}
},
{
$unwind : "$used"
},
{
$project : {
"asin" : "$uniqueNumber",
"price" : "$used.price",
"ts" : "$used.created",
"y" : { "$year" : "$used.created" },
"m" : { "$month" : "$used.created" },
"d" : { "$dayOfMonth" : "$used.created" }
}
},
{
$group : { _id : { "year" : "$y", "month" : "$m", "day" : "$d" }, minPriceOfDay : { $min: "$price" }}
}
]);
No I only need to find a way to do this also to the new array in the same query.