MongoDB count distinct items - mongodb

I have following query on a list with this fields : key,time,p,email
use app_db;
db.getCollection("app_log").aggregate(
[
{
"$match" : {
"key" : "login"
}
},
{
"$group" : {
"_id" : {
"$substr" : [
"$time",
0.0,
10.0
]
},
"total" : {
"$sum" : "$p"
},
"count" : {
"$sum" : 1.0
}
}
}
]
);
and the output is something like this :
{
"_id" : "2019-08-25",
"total" : NumberInt(623),
"count" : 400.0
}
{
"_id" : "2019-08-24",
"total" : NumberInt(2195),
"count" : 1963.0
}
{
"_id" : "2019-08-23",
"total" : NumberInt(1294),
"count" : 1706.0
}
{
"_id" : "2019-08-22",
"total" : NumberInt(53),
"count" : 1302.0
}
But I need the count to be distinctive on email field, which is count number of distinct email addresses who logged in per day and their p value is greater 0

You need $addToSet to get an array of unique email values per day and then you can use $size to count the number of items in that array:
db.getCollection("app_log").aggregate(
[
{
"$match" : {
"key" : "login"
}
},
{
"$group" : {
"_id" : {
"$substr" : [
"$time",
0.0,
10.0
]
},
"total" : {
"$sum" : "$p"
},
"emails" : {
"$addToSet": "$email"
}
}
},
{
$project: {
_id: 1,
total: 1,
countDistinct: { $size: "$emails" }
}
}
]
);

Related

How to write mongo query

How I can get the total number of seats available for a particular movie (seats present in all the theatres for that movie) from the mongodb schema below.
I need to write a mongo query to get the results
{
"_id" : ObjectId("5d637b5ce27c7d60e5c42ae7"),
"name" : "Bangalore",
"movies" : [
{
"name" : "KGF",
"theatres" : [
{
"name" : "PVR",
"seats" : 45
},
{
"name" : "IMAX",
"seats" : 46
}
]
},
{
"name" : "Avengers",
"theatres" : [
{
"name" : "IMAX",
"seats" : 50
}
]
}
],
"_class" : "com.BMS_mongo.ZZ_BMS_mongo_demo.Entity.CityInfo"
}
I have written this code :
db.cities.aggregate( [
{ "$unwind" : "$movies" }, { "$unwind" : "$theatres" } ,
{ "$group" : { _id : "$movies.theatre`enter code here`s.seats" ,
total : { "$sum" : "$seats" } }
}
] )
My schema:
The following query can get us the expected output:
db.collection.aggregate([
{
$unwind:"$movies"
},
{
$unwind:"$movies.theatres"
},
{
$group:{
"_id":"$movies.name",
"movie":{
$first:"$movies.name"
},
"totalSeats":{
$sum:"$movies.theatres.seats"
}
}
},
{
$project:{
"_id":0
}
}
]).pretty()
Data set:
{
"_id" : ObjectId("5d637b5ce27c7d60e5c42ae7"),
"name" : "Bangalore",
"movies" : [
{
"name" : "KGF",
"theatres" : [
{
"name" : "PVR",
"seats" : 45
},
{
"name" : "IMAX",
"seats" : 46
}
]
},
{
"name" : "Avengers",
"theatres" : [
{
"name" : "IMAX",
"seats" : 50
}
]
}
],
"_class" : "com.BMS_mongo.ZZ_BMS_mongo_demo.Entity.CityInfo"
}
Output:
{ "movie" : "Avengers", "totalSeats" : 50 }
{ "movie" : "KGF", "totalSeats" : 91 }
Query:
db.movie.aggregate([{ $unwind: { path: "$movies",} },
{ $unwind: { path: "$movies.theatres",} },
{ $group: { _id: "$movies.name", "moviename": { $first: "$movies.name" },
"totalSeats": { $sum: "$movies.theatres.seats" }} }])
I got the answer using this query ...
db.cities.aggregate( [
{ "$match" : { "name" : "Bangalore" } },
{ "$unwind" : "$movies" } ,
{ "$match" : {"movies.name" : "KGF"} },
{ "$unwind" : "$theatres" },
{ "$group" : { _id : "$movies.name", total : { "$sum" : "$movies.theatres.seats"
} } }
] )

In Mongodb How to Give two different $match

In Db I have some sample data:
Object 1
"_id" : ObjectId("5b5934bb49b")
"payment" : {
"paid_total" : 500,
"name" : "havi",
"payment_mode" : "cash",
"pd_no" : "PD20725001",
"invoices" : [
{
"invoice_number" : "IN11803831583"
}
],
"type" : "Payment"
}
Object 2
"_id" : ObjectId("5b5934ee31e"),
"patient" : {
"invoice_date" : "2018-07-26",
"invoiceTotal" : 2000,
"pd_no" : "PD20725001",
"type" : "Invoice",
"invoice_number" : "IN11803831583"
}
Note: All the Data is In same Collection
As the above shown data I have many objects in my database. How can I get the Sum from the data above of invoiceTotal and sum of paid_total and then subtract the paid_total from invoiceTotal and show the balance amount for matching pd_no and invoice_number.
The output I expect looks like
invoiceTotal : 2000
paid_total : 500
Balance : 1500
Sample Input :
{
"_id" : ObjectId("5b596969a88e07f00d6dac17"),
"payment" : {
"paid_total" : 500,
"name" : "havi",
"payment_mode" : "cash",
"pd_no" : "PD20725001",
"invoices" : [
{
"invoice_number" : "IN11803831583"
}
],
"type" : "Payment"
}
}
{
"_id" : ObjectId("5b596986a88e07f00d6dac18"),
"patient" : {
"invoice_date" : "2018-07-26",
"invoiceTotal" : 2000,
"pd_no" : "PD20725001",
"type" : "Invoice",
"invoice_number" : "IN11803831583"
}
}
Use this aggregate query :
db.test.aggregate([
{
$project : {
_id : 0,
pd_no : { $ifNull: ["$payment.pd_no", "$patient.pd_no" ] },
invoice_no : { $ifNull: [ { $arrayElemAt : ["$payment.invoices.invoice_number", 0] },"$patient.invoice_number" ] },
type : { $ifNull: [ "$payment.type", "$patient.type" ] },
paid_total : { $ifNull: [ "$payment.paid_total", 0 ] },
invoice_total : { $ifNull: [ "$patient.invoiceTotal", 0 ] },
}
},
{
$group : {
_id : {
pd_no : "$pd_no",
invoice_no : "$invoice_no"
},
paid_total : {$sum : "$paid_total"},
invoice_total : {$sum : "$invoice_total"}
}
},
{
$project : {
_id : 0,
pd_no : "$_id.pd_no",
invoice_no : "$_id.invoice_no",
invoice_total : "$invoice_total",
paid_total : "$paid_total",
balance : {$subtract : ["$invoice_total" , "$paid_total"]}
}
}
])
In this query we are first finding the pd_no and invoice_no, which we are then using to group the documents. Next, we are getting the invoice_total and paid_total and then subtracting them to get the balance.
Output :
{
"pd_no" : "PD20725001",
"invoice_no" : "IN11803831583",
"invoice_total" : 2000,
"paid_total" : 500,
"balance" : 1500
}
I assume that you will only have documents with invoiceTotal or paid_total and never both at the same time.
you need first to get an amount to get the balance so if paid total it needs to be negative and positive on the case of the invoice total, and you can do this by using first the $project on the pipeline.
collection.aggregate([
{
$project : {
'patient.invoiceTotal': 1,
'payment.paid_total': 1,
ammount: {
$ifNull: ['$patient.invoiceTotal', { $multiply: [-1, '$payment.paid_total']}]
}
}
},
{
$group: {
_id: 'myGroup',
invoiceTotal: { $sum: '$patient.invoiceTotal' },
paid_total: { $sum: '$payment.paid_total' },
balance: { $sum: '$ammount' }
}
}
])

mongodb $unwind empty array

With this data:
{
"_id" : ObjectId("576948b4999274493425c08a"),
"virustotal" : {
"scan_id" : "4a6c3dfc6677a87aee84f4b629303c40bb9e1dda283a67236e49979f96864078-1465973544",
"sha1" : "fd177b8c50b457dbec7cba56aeb10e9e38ebf72f",
"resource" : "4a6c3dfc6677a87aee84f4b629303c40bb9e1dda283a67236e49979f96864078",
"response_code" : 1,
"scan_date" : "2016-06-15 06:52:24",
"results" : [
{
"sig" : "Gen:Variant.Mikey.29601",
"vendor" : "MicroWorld-eScan"
},
{
"sig" : null,
"vendor" : "nProtect"
},
{
"sig" : null,
"vendor" : "CAT-QuickHeal"
},
{
"sig" : "HEUR/QVM07.1.0000.Malware.Gen",
"vendor" : "Qihoo-360"
}
]
}
},
{
"_id" : ObjectId("5768f214999274362f714e8b"),
"virustotal" : {
"scan_id" : "3d283314da4f99f1a0b59af7dc1024df42c3139fd6d4d4fb4015524002b38391-1466529838",
"sha1" : "fb865b8f0227e9097321182324c959106fcd8c27",
"resource" : "3d283314da4f99f1a0b59af7dc1024df42c3139fd6d4d4fb4015524002b38391",
"response_code" : 1,
"scan_date" : "2016-06-21 17:23:58",
"results" : [
{
"sig" : null,
"vendor" : "Bkav"
},
{
"sig" : null,
"vendor" : "ahnlab"
},
{
"sig" : null,
"vendor" : "MicroWorld-eScan"
},
{
"sig" : "Mal/DrodZp-A",
"vendor" : "Qihoo-360"
}
]
}
}
I'm trying to group by and count the vendor when sig is not null in order to obtain something like:
{
"_id" : "Qihoo-360",
"count" : 2
},
{
"_id" : "MicroWorld-eScan",
"count" : 1
},
{
"_id" : "Bkav",
"count" : 0
},
{
"_id" : "CAT-QuickHeal",
"count" : 0
}
At the moment with this code:
db.analysis.aggregate([
{ $unwind: "$virustotal.results" },
{
$group : {
_id : "$virustotal.results.vendor",
count : { $sum : 1 }
}
},
{ $sort : { count : -1 } }
])
I'm getting everything:
{
"_id" : "Qihoo-360",
"count" : 2
},
{
"_id" : "MicroWorld-eScan",
"count" : 2
},
{
"_id" : "Bkav",
"count" : 1
},
{
"_id" : "CAT-QuickHeal",
"count" : 1
}
How can I count 0 if the sig is null?
You need a conditional expression in your $sum operator that will check if the "$virustotal.results.sig" key is null by using the comparison operator $gt (as specified in the documentation's BSON comparsion order)
You can restructure your pipeline by adding this expression as follows:
db.analysis.aggregate([
{ "$unwind": "$virustotal.results" },
{
"$group" : {
"_id": "$virustotal.results.vendor",
"count" : {
"$sum": {
"$cond": [
{ "$gt": [ "$virustotal.results.sig", null ] },
1, 0
]
}
}
}
},
{ "$sort" : { "count" : -1 } }
])
Sample Output
/* 1 */
{
"_id" : "Qihoo-360",
"count" : 2
}
/* 2 */
{
"_id" : "MicroWorld-eScan",
"count" : 1
}
/* 3 */
{
"_id" : "Bkav",
"count" : 0
}
/* 4 */
{
"_id" : "CAT-QuickHeal",
"count" : 0
}
/* 5 */
{
"_id" : "nProtect",
"count" : 0
}
/* 6 */
{
"_id" : "ahnlab",
"count" : 0
}
I changed the null with None and the numbers increased but seems not correct yet.
Basically doing the query in mongoshell I get like
{
"_id" : "Kaspersky",
"count" : 176.0
}
from python:
Kaspersky 64
one of these 2 is wrong :)
So I'm trying to investigate what part of the query in python is not correctly written compared to the mongo shell one.
I did a simple query:
In mongoshell:
rtmp = results_db.analysis.count( { "virustotal.results" : { "$elemMatch" : { "vendor": "Kaspersky", "sig": {"$ne": "null"} } }})
results: 176
db.analysis.count( { "virustotal.results" : { $elemMatch : { "vendor": "Kaspersky", "sig": {$gt: null} } }})
results: 0
Then I tried in python:
rtmp = results_db.analysis.count( { "virustotal.results" : { "$elemMatch" : { "vendor": "Kaspersky", "sig": {"$ne": "null"} } }})
results: 568
rtmp = results_db.analysis.count( { "virustotal.results" : { "$elemMatch" : { "vendor": "Kaspersky", "sig": {"$ne": "None"} } }})
results: 568
rtmp = results_db.analysis.count( { "virustotal.results" : { "$elemMatch" : { "vendor": "Kaspersky", "sig": {"$gt": "None"} } }})
results: 64
rtmp = results_db.analysis.count( { "virustotal.results" : { "$elemMatch" : { "vendor": "Kaspersky", "sig": {"$gt": "null"} } }})
results: 6
hard to says what is the correct value! I suppose 176 but not able to reproduce in python...

How to perform count(column) in mongodb with aggregation?

I am trying to do to the equivalent of the following query in mongodb:
select count(*), count(category), sum(price) from sales group by usergender
Here is what the documents in my collection look like:
{
"_id" : ObjectId("54da8b0aa7c80aed4a9f9f33"),
"userincome" : "$100,000 - $200,000",
"county" : "Los Angeles",
"userstate" : "California",
"usercity" : "Los Angeles",
"price" : 100,
"category" : "Swimwear",
"usergender" : "Male"
}
Here is my aggregation which returns count(*) and sum(price) but I am not sure how to add in count(category).
db['stream.sales'].aggregate([
{
$group:{
_id:"$usergender",
price:{
$sum:"$price"
},
_count:{
$sum:1
}
}
}
])
I know I can run a separate aggregation to get count(category) but I would like to do it in aggregation, because I don't want all my results filtered where category exists = true.
db['stream.sales'].aggregate([
{
$match:{
'category':{
"$exists":true
}
}
},
{
$group:{
_id:"$usergender",
count:{
$sum:1
}
}
}
]);
Edit:
Was able to find the solution with the help of wdberkleys response:
db['stream.sales'].aggregate([
{ "$group" : {
"_id" : "$usergender",
"count" : { "$sum" : 1 },
"price" : { "$sum" : "$price" },
"category" : { "$push" : "$category" }
} },
{ "$project" : {
"count" : 1,
"size" : 1,
"categories" : { "$size" : "$category" }
} }
])
Push the categories to a set during the $group, then $project the size of the resulting set of categories:
db.stream.sales.aggregate([
{ "$group" : {
"_id" : "$usergender",
"count" : { "$sum" : 1 },
"price" : { "$sum" : "$price" },
"categories" : { "$addToSet" : "$category" }
} },
{ "$project" : {
"count" : 1,
"size" : 1,
"categories" : { "$size" : "$category" }
} }
])

Use field value as key

I am doing this query
db.analytics.aggregate([
{
$match: {"event":"USER_SENTIMENT"}
},
{ $group: {
_id: {brand:"$data.brandId",sentiment:"$data.sentiment"},
count: {$sum : 1}
}
},
{ $group: {
_id: "$_id.brand",
sentiments: {$addToSet : {sentiment:"$_id.sentiment", count:"$count"}}
}
}
])
Which generates that :
{
"result" : [
{
"_id" : 57,
"sentiments" : [
{
"sentiment" : "Meh",
"count" : 4
}
]
},
{
"_id" : 376,
"sentiments" : [
{
"sentiment" : "Meh",
"count" : 1
},
{
"sentiment" : "Happy",
"count" : 1
},
{
"sentiment" : "Confused",
"count" : 1
}
]
}
],
"ok" : 1
}
But What I want is that :
[
{
"_id" : 57,
"Meh" : 4
},
{
"_id" : 376,
"Meh" : 1,
"Happy" : 1,
"Confused" : 1
}
]
Any idea on how to transform that? The blocking point for me is to transform a value into a key.