MongoDb how to count fields - mongodb

I have some data like this:
{
user_id:1,
group_id:123,
discription:null
},
{
user_id:1,
group_id:321,
discription:null
},
{
user_id:1,
group_id:123,
discription:"text"
},
{
user_id:1,
group_id:321,
discription:"another text"
},
{
user_id:1,
group_id:321,
discription:"another another text"
},
etc..
I want get all groups (group by group_id), count of document in each group, and count of documents in that group that have "discription" with null value and not null value.
So i need results like:
[
[group_id:123, count:2, isNull:1, isNotNull:1],
[group_id:321, count:3, isNull:1, isNotNull:2]
]
I know how to group fields by "group_id" and get the "count", but I don't know how to get the info about "description".
db.collection.aggregate([
{
$match:{
user_id:1
}
},
{
$group:{
_id:'$group_id',
group_id:{$first:'$group_id'},
count:{'$sum':1}
}
}
])

Please check this Query
db.testing.aggregate([{
$match:{
user_id:1
}
},
{$project:{_id:0,user_id:1,group_id: 1,description: { $ifNull: [ "$discription", 1 ] }}},
{
$group:{
_id:'$group_id',
group_count:{'$sum':1},
IsNull:{"$sum":"$description"}
}
},
{$project:{_id:0,group_id:"$_id", count:"$group_count", isNull:"$IsNull", isNotNull:{ $subtract:["$group_count","$IsNull"]}}}
])

Related

get document with same 3 fields in a collection

i have a collection with more then 1000 documents and there are some documents with same value in some fields, i need to get those
the collection is:
[{_id,fields1,fields2,fields3,etc...}]
what query can i use to get all the elements that have the same 3 fields for example:
[
{_id:1,fields1:'a',fields2:1,fields3:'z'},
{_id:2,fields1:'a',fields2:1,fields3:'z'},
{_id:3,fields1:'f',fields2:2,fields3:'g'},
{_id:4,fields1:'f',fields2:2,fields3:'g'},
{_id:5,fields1:'j',fields2:3,fields3:'g'},
]
i need to get
[
{_id:2,fields1:'a',fields2:1,fields3:'z'},
{_id:4,fields1:'f',fields2:2,fields3:'g'},
]
in this way i can easly get a list of "duplicate" that i can delete if needed, it's not really important get id 2 and 4 or 1 and 3
but 5 would never be included as it's not 'duplicated'
EDIT:
sorry but i forgot to mention that there are some document with null value i need to exclude those
This is the perfect use case of window field. You can use $setWindowFields to compute $rank in the grouping/partition you want. Then, get those rank not equal to 1 to get the duplicates.
db.collection.aggregate([
{
$match: {
fields1: {
$ne: null
},
fields2: {
$ne: null
},
fields3: {
$ne: null
}
}
},
{
"$setWindowFields": {
"partitionBy": {
fields1: "$fields1",
fields2: "$fields2",
fields3: "$fields3"
},
"sortBy": {
"_id": 1
},
"output": {
"duplicateRank": {
"$rank": {}
}
}
}
},
{
$match: {
duplicateRank: {
$ne: 1
}
}
},
{
$unset: "duplicateRank"
}
])
Mongo Playground
I think you can try this aggregation query:
First group by the feilds you want to know if there are multiple values.
It creates an array with the _ids that are repeated.
Then get only where there is more than one ($match).
And last project to get the desired output. I've used the first _id found.
db.collection.aggregate([
{
"$group": {
"_id": {
"fields1": "$fields1",
"fields2": "$fields2",
"fields3": "$fields3"
},
"duplicatesIds": {
"$push": "$_id"
}
}
},
{
"$match": {
"$expr": {
"$gt": [
{
"$size": "$duplicatesIds"
},
1
]
}
}
},
{
"$project": {
"_id": {
"$arrayElemAt": [
"$duplicatesIds",
0
]
},
"fields1": "$_id.fields1",
"fields2": "$_id.fields3",
"fields3": "$_id.fields2"
}
}
])
Example here

MongoDB query $group is returning null

I am learning MongoDb query and my requirement is to calculate the average time between two dates. I wrote a mongoDB query with project and group stages.
{
project: {
OrderObject:1,
date:1
}
},
{
group: {
objectId: '$OrderObject.pharmacy.companyName',
count: {
$sum: 1
},
duration: {
$avg: {
$abs: {
$divide: [
{
$subtract: [
{
$arrayElemAt: [
'$date',
0
]
},
{
$arrayElemAt: [
'$date',
1
]
}
]
},
60000
]
}
}
},
OrderIDs: {
$addToSet: '$OrderObject.orderID'
},
pharmacyName: {
$addToSet: '$OrderObject.pharmacy.companyName'
},
}
}
The output I get is
{
count: 3,
duration: 54.53004444444445,
OrderIDs: [ 'ABCDE', 'EWQSE', 'ERTRE' ],
pharmacyName: [ 'pharmacy business Name' ],
objectId: null
},
Can someone please tell me why objectId is null in this case but the value is printed in pharmacyName field. I am using this pipeline in parse server as query.aggregate(pipeline, {useMasterKey:true})
The my expectation is pharmacyName === objectId
Most probably your nested element here is with different name:
OrderObject.name.companyName
but this is not an issue for the $group stage since it make the aggregation for all elements( in total 3) in the collection when the _id is null and do not give you any errors ...
It is also interesing why in your output the "pharmacyName" appear simply as "name" ? ;)

If condition in MongoDB for Nested JSON to retrieve a particular value

I've nested JSON like this. I want to retrieve the value of "_value" in second level. i,e. "Living Organisms" This is my JSON document.
{
"name": "Biology Book",
"data": {
"toc": {
"_version": "1",
"ge": [
{
"_name": "The Fundamental Unit of Life",
"_id": "5a",
"ge": [
{
"_value": "Living Organisms",
"_id": "5b"
}
]
}
]
}
}
}
This is what I've tried, using the "_id", I want to retrieve it's "_value"
db.products.aggregate([{"$match":{ "data.toc.ge.ge._id": "5b"}}])
This is the closest I could get to the output you mentioned in the comment above. Hope it helps.
db.collection.aggregate([
{
$match: {
"data.toc.ge.ge._id": "5b"
}
},
{
$unwind: "$data.toc.ge"
},
{
$unwind: "$data.toc.ge.ge"
},
{
$group: {
_id: null,
book: {
$push: "$data.toc.ge.ge._value"
}
}
},
{
$project: {
_id: 0,
first: {
$arrayElemAt: [
"$book",
0
]
},
}
}
])
Output:
[
{
"first": "Living Organisms"
}
]
You can check what I tried here
If you are using Mongoid:
(1..6).inject(Model.where('data.toc.ge.ge._id' => '5b').pluck('data.toc.ge.ge._value').first) { |v| v.values.first rescue v.first rescue v }
# => "Living Organisms"
6 is the number of containers to trim from the output (4 hashes and 2 arrays).
If I understand your question correctly, you only care about _value, so it sounds like you might want to use a projection:
db.products.aggregate([{"$match":{ "data.toc.ge.ge._id": "5b"}}, { "$project": {"data.toc.ge.ge._value": 1}}])

get the document id and default value from mongo aggregation function

Here is my sample document structure
{
id: "abc"
config: [
{
feature_id: f1,
properties: [
{
id: 1
todo: "check the microwave"
}
{
id: 2
todo: "check the food"
}
]
},
{
feature_id: f2,
}
]
}
I am using mongoDb aggregation function to get todo based on feature_id.
Tried multiple solution but it seems to fails
db.getCollection('foo').aggregate([
{$match: {
"id": "abc",
"config": {
$elemMatch: {
"feature_id": "f1"
}
}
}
},
{
$project : { "config": 1, "_id": 0}
},
{
$unwind: "$configurations"
},
]);
though I am getting the outcome , but it far from satisfactory.
the expected output
[{id: 1 , todo: "check the microwave" }]
You can use use any of the below aggregation query in 3.4.
db.getCollection('foo').aggregate([
{"$match":{"id":"abc","config.feature_id":"f1"}},
{"$unwind":"$config"},
{"$match":{"config.feature_id":"f1"}},
{"$unwind":"$config.properties"},
{"$match":{"config.properties.id":1}},
{"$project":{"data":"$config.properties"}},
{"$replaceRoot":{"newRoot":"$data"}}
])
OR
db.getCollection('foo').aggregate([
{"$match":{"id":"abc","config.feature_id":"f1"}},
{"$project":{
"property":{
"$arrayElemAt":[
{"$filter":{
"input":{
"$let":{
"vars":{
"config":{
"$arrayElemAt":[
{"$filter":{
"input":"$config",
"as":"cf",
"cond":{"$eq":["$$cf.feature_id","f1"]}
}},
0
]
}
},
"in":"$$config.properties"
}
},
"as":"pf",
"cond":{"$eq":["$$pf.id",1]}
}},0]}
}},
{"$replaceRoot":{"newRoot":"$property"}}
])
Here is another solution
db.getCollection('foo').aggregate([
{"$match":{"id":"abc","config.feature_id":"f1"}},
{"$unwind":"$config"},
{"$match":{"config.feature_id":"f1"}},
{"$unwind":"$config.properties"},
{"$match":{"config.properties.id":1}},
{"$project":{"_id": "$config.properties.id", "todo": "$config.properties.todo"}},
])

Find or forEach Inside Aggregate MongoDB

Let's say I have two collection in my database call rumahsakit.
First collection called dim_dokter:
[{
"_id": ObjectId("58b22c79e8c1c52bf3fad997"),
"nama_dokter": "Dr. Basuki Hamzah",
"spesialisasi": "Spesialis Farmakologi Klinik",
"alamat": " Jalan Lingkar Ring Road Utara, Yogyakarta "
}, {
"_id": ObjectId("58b22c79e8c1c52bf3fad998"),
"nama_dokter": "Dr. Danie Nukman",
"spesialisasi": "Spesialis Anak",
"alamat": " Jalan Sudirman, Yogyakarta "
}, {
"_id": ObjectId("58b22c79e8c1c52bf3fad999"),
"nama_dokter": "Dr. Bambang Kurnia",
"spesialisasi": "Spesialis Mikrobiologi Klinik",
"alamat": " Jalan Ahmad Yani, Yogyakarta "
}]
Second collection called fact_perawatan:
[{
"_id": ObjectId("58b22d13e8c1c52bf3fad99a"),
"nama_pasien": "Clark",
"detail_perawatan": [{
"id_dokter": ObjectId("58b22c79e8c1c52bf3fad997"),
"jumlah_obat": 1
}, {
"id_dokter": ObjectId("58b22c79e8c1c52bf3fad998"),
"jumlah_obat": 1
}]
}]
Collection fact_perawatan have the id_dokter that is actually point to the dim_dokter._id . I want to do aggregation to show this data in fact_perawatan collection but instead showing just the id_dokter, I want to use nama_dokter from dim_dokter.
This is my code so far:
db.fact_perawatan.aggregate([
{
$match:
{
'_id': db.fact_perawatan.find({"_id" : ObjectId("58b22d13e8c1c52bf3fad99a")})[0]._id
}
},
{
$project:
{
nama_pasien: db.fact_perawatan.find({"_id": ObjectId("58b22d13e8c1c52bf3fad99a")})[0].nama_pasien,
perawatan: [
{
dokter: db.dim_dokter.find({"_id" : db.fact_perawatan.find({"_id": ObjectId("58b22d13e8c1c52bf3fad99a")})[0].detail_perawatan[0].id_dokter})[0].nama_dokter,
},
]
}
}
])
result:
{ "_id" : ObjectId("58b22d13e8c1c52bf3fad99a"), "nama_pasien" : "Clark", "perawatan" : [ { "dokter" : "Dr. Basuki Hamzah" } ] }
Those code can get the nama_dokter from dim_dokter, but only one data. In my case, the data can be up to 5. do the detail_perawatan[0] to [5] is not solution.
So, this code:
db.dim_dokter.find({"_id" : db.fact_perawatan.find({"_id": ObjectId("58b22d13e8c1c52bf3fad99a")})[0].detail_perawatan[0].id_dokter})[0].nama_dokter,
How make the code above to loop as many as the data in there ? so I can get all the data.
Thanks
You can use a $lookup to left join with dim_dokter following by a $group to regroup your data in perawatan field :
db.fact_perawatan.aggregate([{
$match: {
'_id': ObjectId("58b22d13e8c1c52bf3fad99a")
}
}, {
$unwind: "$detail_perawatan"
}, {
$lookup: {
from: "dim_dokter",
localField: "detail_perawatan.id_dokter",
foreignField: "_id",
as: "dim_dokter"
}
}, {
$unwind: "$dim_dokter"
}, {
$group: {
_id: "$_id",
nama_pasien: {
$first: "$nama_pasien"
},
perawatan: {
$push: {
"dokter": "$dim_dokter.nama_dokter"
}
}
}
}])
The $unwind is used to deconstruct the array field detail_perawatan to be able to $lookup afterwards