MongoDB query to include count of most frequent values for multiple fields - mongodb

Thank you in advance for any help!
I've a collection QR with schema similar to this:
var qrSchema = new Schema({
qrId: { type: String, index: true },
owner: { type: Schema.Types.ObjectId, ref: 'User' },
qrName: { type: String },
qrCategory: { type: String, index: true },
shortUrl: { type: String}}
})
And collection Datas similar to this:
var dataSchema = new Schema({
qrId: { type: String, index: true}
city: { type: String},
device: { type: String},
date: { type: Date, index:true},
})
The relation between QR and Datas is 1-to-many.
I've an aggregate like this:
Model.QR.aggregate([
{ $match: {
$and: [
{ owner: mongoose.Types.ObjectId(user._id) },
{
$expr: {
$cond: [
{ $in: [ category, [ null, "", "undefined" ]] },
true,
{ $eq: [ "$qrCategory", category ] }
]
}
}
]
}
},
{ $lookup:
{
"from": "datas",
"localField": "qrId",
"foreignField": "qrId",
"as": "data"
}
},
{
$project: {
_id: 0,
qrId: 1,
qrName: 1,
qrCategory: 1,
shortUrl: 1,
data: {
$filter: {
input: "$data",
as: "item",
cond: {
$and: [
{ $gte: [ "$$item.date", date.start ] },
{ $lte: [ "$$item.date", date.end ] }
] }
}
}
}
},
{
$group: {
_id: { "qrId": "$qrId", "qrName": "$qrName", "qrCategory": "$qrCategory", "shortUrl": "$shortUrl" },
data: {
$push: {
dataItems: "$data",
count: {
$size: { '$ifNull': ['$data', []] }
}
}
}
}
},
{
$sort: {
"data.count": -1
}
},
{
$limit: 10,
}]).exec((err, results) => { })
Which is returning something like:
[
{
"_id": {
"qrId": "0PRA",
"qrName": "Campaign 0PRA",
"qrCategory": "html",
"shortUrl": "http://someurl.com/0PRA"
},
"data": [
{
"dataItems": [
{
"_id": "6200f2a8c0cf7a1c49233c7f",
"qrId": "0PRA",
"device": "iOS",
"city": "Beijing",
},
{
"_id": "6200f2eac0cf7a1c49233c80",
"qrId": "0PRA",
"device": "AndroidOS",
"city": "Beijing",
},
{
"_id": "6200f3a4c0cf7a1c49233c81",
"qrId": "0PRA",
"device": "AndroidOS",
"city": "Beijing",
},
{
"_id": "6200f632c0cf7a1c49233c88",
"qrId": "0PRA",
"device": "AndroidOS",
"city": "Nanchang",
},
{
"_id": "6201b342c0cf7a1c49233caa",
"qrId": "0PRA",
"device": "iOS",
"city": "Taizhou",
}
],
"count": 5
}
]
},
{
"_id": {
"qrId": "NQ17",
"qrName": "Campaign NQ17",
"qrCategory": "menu",
"shortUrl": "http://someurl.com/NQ17"
},
"data": [
{
"dataItems": [
{
"_id": "6200f207c0cf7a1c49233c7a",
"qrId": "NQ17",
"device": "iOS",
"city": "Singapore"
},
{
"_id": "8200f207c1cf7a1c49233c7a",
"qrId": "NQ17",
"device": "iOS",
"city": "Singapore"
},
{
"_id": "6200ac5db44f23b9ec2b6040",
"qrId": "NQ17",
"device": "AndroidOS",
"city": "San Antonio"
}
],
"count": 3
}
]
}
]
I'm trying to include the most frequent device and city in the results after the count of dataItems, like this:
[
{
"_id": {
"qrId": "0PRA",
"qrName": "Campaign 0PRA",
"qrCategory": "html",
"shortUrl": "http://someurl.com/0PRA"
},
"data": [
{
"dataItems": [
{
"_id": "6200f2a8c0cf7a1c49233c7f",
"qrId": "0PRA",
"device": "iOS",
"city": "Beijing",
},
{
"_id": "6200f2eac0cf7a1c49233c80",
"qrId": "0PRA",
"device": "AndroidOS",
"city": "Beijing",
},
{
"_id": "6200f3a4c0cf7a1c49233c81",
"qrId": "0PRA",
"device": "AndroidOS",
"city": "Beijing",
},
{
"_id": "6200f632c0cf7a1c49233c88",
"qrId": "0PRA",
"device": "AndroidOS",
"city": "Nanchang",
},
{
"_id": "6201b342c0cf7a1c49233caa",
"qrId": "0PRA",
"device": "iOS",
"city": "Taizhou",
}
],
"count": 5,
"topDevice": "AndroidOS", // <---- trying to add this
"topLocation": "Beijing" // <---- trying to add this
}
]
},
{
"_id": {
"qrId": "NQ17",
"qrName": "Campaign NQ17",
"qrCategory": "menu",
"shortUrl": "http://someurl.com/NQ17"
},
"data": [
{
"dataItems": [
{
"_id": "6200f207c0cf7a1c49233c7a",
"qrId": "NQ17",
"device": "iOS",
"city": "Singapore"
},
{
"_id": "8200f207c1cf7a1c49233c7a",
"qrId": "NQ17",
"device": "iOS",
"city": "Singapore"
},
{
"_id": "6200ac5db44f23b9ec2b6040",
"qrId": "NQ17",
"device": "android",
"city": "San Antonio"
}
],
"count": 3,
"topDevice": "iOS", // <---- trying to add this
"topLocation": "Singapore" // <---- trying to add this
}
]
}
]
Is this possible?
Thank you very much in advance for any help or hints!

Method 1
Use $function will be way more easier. MongoDB version >= 4.4
Sort function in js
db.collection.aggregate([
{
"$set": {
"data": {
"$map": {
"input": "$data",
"as": "d",
"in": {
"count": "$$d.count",
"dataItems": "$$d.dataItems",
"topDevice": {
$function: {
body: "function(arr) {return arr.sort((a,b) =>arr.filter(v => v===a).length-arr.filter(v => v===b).length).pop() }",
args: [ "$$d.dataItems.device" ],
lang: "js"
}
},
"topLocation": {
$function: {
body: "function(arr) {return arr.sort((a,b) =>arr.filter(v => v===a).length-arr.filter(v => v===b).length).pop() }",
args: [ "$$d.dataItems.city" ],
lang: "js"
}
}
}
}
}
}
}
])
mongoplayground
Method 2
db.qr.aggregate([
{
"$match": {
owner: {
"$in": [
"1",
"2"
]
}
}
},
{
"$lookup": {
"from": "data",
"localField": "qrId",
"foreignField": "qrId",
"as": "data",
"pipeline": [
{
"$match": {
"$and": [
{
"date": {
"$gte": ISODate("2021-09-01T01:23:25.184Z")
}
},
{
"date": {
"$lte": ISODate("2021-09-02T11:23:25.184Z")
}
}
]
}
},
{
"$facet": {
"deviceGroup": [
{
"$group": {
"_id": "$device",
"sum": {
"$sum": 1
}
}
},
{
"$sort": {
sum: -1
}
},
{
"$limit": 1
}
],
"cityGroup": [
{
"$group": {
"_id": "$city",
"sum": {
"$sum": 1
}
}
},
{
"$sort": {
sum: -1
}
},
{
"$limit": 1
}
],
"all": []
}
}
]
}
},
{
"$set": {
"data": {
"$first": "$data.all"
},
"topDevice": {
"$first": {
"$first": "$data.deviceGroup._id"
}
},
"topLocation": {
"$first": {
"$first": "$data.cityGroup._id"
}
}
}
},
{
$group: {
_id: {
"qrId": "$qrId",
"qrName": "$qrName",
"qrCategory": "$qrCategory",
"shortUrl": "$shortUrl"
},
data: {
$push: {
dataItems: "$data",
topDevice: "$topDevice",
topLocation: "$topLocation",
count: {
$size: {
"$ifNull": [
"$data",
[]
]
}
}
}
}
}
}
])
mongoplayground

Query
add the match you need, i didn't understand what the match should do
lookup on qrId
filter to keep only the start<=dates<=end (replace the 1 and 100)
facet to group all-documents, the topDevice the topLocation
$set to bring those data out from the nested locations they are
count is added as the size of all-documents
*maybe i am missing something, but try it(first part i think its like YuTing answer)
Test code here
QR.aggregate(
[{"$lookup":
{"from":"Datas",
"localField":"qrId",
"foreignField":"qrId",
"pipeline":
[{"$match":{"$and":[{"date":{"$gte":1}}, {"date":{"$lte":100}}]}},
{"$facet":
{"dataItems":[],
"topDevice":
[{"$group":{"_id":"$device", "count":{"$sum":1}}},
{"$sort":{"count":-1}}, {"$limit":1}],
"topLocation":
[{"$group":{"_id":"$city", "count":{"$sum":1}}},
{"$sort":{"count":-1}}, {"$limit":1}]}}],
"as":"data"}},
{"$set":{"data":{"$arrayElemAt":["$data", 0]}}},
{"$set":
{"dataItems":"$data.dataItems",
"count":{"$size":"$data.dataItems"},
"topDevice":
{"$getField":
{"field":"_id", "input":{"$arrayElemAt":["$data.topDevice", 0]}}},
"topLocation":
{"$getField":
{"field":"_id",
"input":{"$arrayElemAt":["$data.topLocation", 0]}}},
"data":"$$REMOVE"}}])

Related

mongodb $lookup and match for an array item from inside look up

return the order with the user where purchase id: 123, product id: p123 and user and order shipping.mode =2
db={
"orders": [
{
"_id": ObjectId("62155381877d4300196008ef"),
"shipping": {
"mode": 1
},
"products": [],
"user": ObjectId("6186bd3315a342001bd84f42"),
},
{
"_id": ObjectId("6215569b54cc7f0030c44e0f"),
"shipping": {
"mode": 2
},
"user": ObjectId("6186bd3315a342001bd84f43"),
"products": [
{
"id": "p123"
}
],
}
],
"users": [
{
"_id": ObjectId("6186bd3315a342001bd84f43"),
"shipping": {
"mode": 2
},
"name": "user100",
"purchase": [
{
"id": "123"
},
{
"id": "hjhh"
}
],
}
]
}
https://mongoplayground.net/p/IomR8U7Ard-
db.orders.aggregate([
{
"$lookup": {
"from": "users",
"localField": "user",
"foreignField": "_id",
"as": "user"
}
},
{
"$unwind": "$user"
},
{
"$unwind": "$user.purchase"
},
{
"$match": {
"$and": [
{
"shipping.mode": {
"$gt": 0
}
},
]
}
},
{
"$match": {
"$or": [
{
"$and": [
{
"user.shipping.mode": {
"$eq": 2
}
},
{
"user.purchase.id": {
"$eq": "123"
}
},
{
"$expr": {
"$in": [
"p123",
"$products.id"
]
}
}
]
},
]
}
},
])

Merge documents from 2 collections in MongoDB & preserve property of a field

I have two collections, 1. temporaryCollection, 2. permanentCollection, I would like to take data from temporaryCollection and update in permanentCollection. To see the expected result see updatedPermanentCollection below.
Fields that are taken from Temporary collection and updated in Permanent collection are:
emailAddresses
phoneNumbers
ContactName
ContactNumber
For your info, the fields that are changed in Temporary collection
contacts[0]['emailAddresses']
contacts[0]['ContactName']
contacts[0]["phoneNumbers"]
contacts[0]["ContactNumber"]
Field that are that should not be changed after updation in UpdatedPermanentCollection is
contacts._id
Note: contacts is an Array of objects, for simplicity I have shown just one object.
I am currently using the below query which updates the permanentCollection but also overrides the contacts._id field. I don't want the contacts._id field to be overridden.
Here is my MongoDB Query
db.temporaryCollection.aggregate([
{
$match: {
userID: ObjectId("61d1efea2c0fab00340f47c8"),
},
},
{
$merge: {
into: "permanentCollection",
on: "userID",
whenMatched: "merge",
whenNotMatched: "insert",
},
},
]);
1. temporaryCollection
{
"_id": { "$oid": "61d1f04266289f003452d705" },
"userID": { "$oid": "61d1efea2c0fab00340f47c8" },
"contacts": [
{
"emailAddresses": [
{ "id": "6884", "label": "email1", "email": "addedemail#gmail.com" }
],
"phoneNumbers": [
{
"label": "other",
"id": "4594",
"number": "+918984292930"
},
{
"label": "other",
"id": "4595",
"number": "+911234567890"
}
],
"_id": { "$oid": "61d1f04266289f003452d744" },
"ContactName": "Sample User 1 Name Changed",
"ContactNumber": "+918984292930",
"recordID": "833"
}
],
"userNumber": "+911234567890",
"__v": 7
}
2. permanentCollection
{
"_id": { "$oid": "61d1f04266289f003452d701" },
"userID": { "$oid": "61d1efea2c0fab00340f47c8" },
"contacts": [
{
"emailAddresses": [],
"phoneNumbers": [
{
"label": "other",
"id": "4594",
"number": "+918984292929"
},
{
"label": "other",
"id": "4595",
"number": "+911234567890"
}
],
"_id": { "$oid": "61d1f04266289f003452d722" },
"ContactName": "Sample User 1",
"ContactNumber": "+918984292929",
"recordID": "833"
}
],
"userNumber": "+911234567890",
"__v": 7
}
3. updatedPermanentCollection (Expected result)
{
"_id": { "$oid": "61d1f04266289f003452d701" },
"userID": { "$oid": "61d1efea2c0fab00340f47c8" },
"contacts": [
{
"emailAddresses": [
{ "id": "6884", "label": "email1", "email": "addedemail#gmail.com" }
],
"phoneNumbers": [
{
"label": "other",
"id": "4594",
"number": "+918984292930"
},
{
"label": "other",
"id": "4595",
"number": "+911234567890"
}
],
"_id": { "$oid": "61d1f04266289f003452d722" },
"ContactName": "Sample User 1 Name Changed",
"ContactNumber": "+918984292930",
"recordID": "833"
}
],
"userNumber": "+911234567890",
"__v": 7
}
Try with this aggregation query.
db.temporarCollection.aggreagate(
[
{
"$lookup": {
"from": "permanantCollection",
"let": {
"user_id": "$userID"
},
"pipeline": [
{
"$match": {
"$expr": {
"$eq": [
"$$user_id", "$userID"
]
}
}
}
],
"as": "pcontacts"
}
}, {
"$unwind": {
"path": "$pcontacts",
"preserveNullAndEmptyArrays": true
}
}, {
"$project": {
"contacts": {
"$map": {
"input": "$contacts",
"as": "contact",
"in": {
"tcontact": "$$contact",
"pcontact": {
"$first": {
"$filter": {
"input": "$pcontacts.contacts",
"as": "pcontact",
"cond": {
"$eq": [
"$$pcontact.recordID", "$$contact.recordID"
]
}
}
}
}
}
}
},
"userNumber": 1,
"userID": 1,
"_id": 0
}
}, {
"$project": {
"contacts": {
"$map": {
"input": "$contacts",
"as": "contact",
"in": {
"emailAddresses": "$$contact.tcontact.emailAddresses",
"phoneNumbers": "$$contact.tcontact.phoneNumbers",
"ContactName": "$$contact.tcontact.ContactName",
"ContactNumber": "$$contact.tcontact.ContactNumber",
"recordID": {
"$let": {
"vars": {},
"in": {
"$cond": {
"if": "$$contact.pcontact.recordID",
"then": "$$contact.pcontact.recordID",
"else": "$$contact.tcontact.recordID"
}
}
}
},
"_id": {
"$let": {
"vars": {},
"in": {
"$cond": {
"if": "$$contact.pcontact._id",
"then": "$$contact.pcontact._id",
"else": "$$contact.tcontact._id"
}
}
}
}
}
}
},
"userNumber": 1,
"userID": 1
}
}, {
"$merge": {
"into": "pc",
"on": "userID",
"whenMatched": "replace",
"whenNotMatched": "insert"
}
}
])
It is not a fully optimized query but it works.
Try to add $unset to db query.
db.temporaryCollection.aggregate([
{
$unset: "_id"
},
{
$match: {
userID: ObjectId("61d1efea2c0fab00340f47c8"),
},
},
{
$merge: {
into: "permanentCollection",
on: "userID",
whenMatched: "merge",
whenNotMatched: "insert",
},
},
]);

Group and aggregate on sub document in mongodb

OBS! Noob question probably :)
Given the following data, how can I query and return a summary for each index?
[
{
"title": "test",
"indexes":[
{ "id":1, "value": 0.5764860139860139860139860140 },
{ "id":2, "value": 0.3083479020979020979020979020 },
{ "id":3, "value": 0.1151660839160839160839160838 }
]
},
{
"title": "test",
"indexes":[
{ "id":1, "value": 0.5764860139860139860139860140 },
{ "id":2, "value": 0.3083479020979020979020979020 },
{ "id":3, "value": 0.1151660839160839160839160838 }
]
},
{
"title": "test",
"indexes":[
{ "id":1, "value": 0.5764860139860139860139860140 },
{ "id":2, "value": 0.3083479020979020979020979020 },
{ "id":3, "value": 0.1151660839160839160839160838 }
]
},
{
"title": "test",
"indexes":[
{ "id":1, "value": 0.5764860139860139860139860140 },
{ "id":2, "value": 0.3083479020979020979020979020 },
{ "id":3, "value": 0.1151660839160839160839160838 }
]
}
]
I.e. I want to produce something like this:
index.id:1, total: 2.305...
index.id:2, total: 1.233...
etc
db.collection.aggregate([
{
"$unwind": "$indexes"
},
{
$group: {
_id: "$indexes.id",
total: {
$sum: "$indexes.value"
}
}
}
])
try this query
you will get like this
[
{
"_id": 2,
"total": 1.2333916083916083
},
{
"_id": 1,
"total": 2.305944055944056
},
{
"_id": 3,
"total": 0.4606643356643357
}
]
db.collection.aggregate([
{
$unwind: "$indexes"
},
{
$group: {
_id: "$indexes.id",
total: {
$sum: "$indexes.value"
}
}
}
])
Working Mongo playground

mongodb aggregate lookup with a query

I have collections with following values:
reports
{
"_id": { "$oid": "5f05e1d13e0f6637739e215b" },
"testReport": [
{
"name": "Calcium",
"value": "87",
"slug": "ca",
"details": {
"description": "description....",
"recommendation": "recommendation....",
"isNormal": false
}
},
{
"name": "Magnesium",
"value": "-98",
"slug": "mg",
"details": {
"description": "description....",
"recommendation": "recommendation....",
"isNormal": false
}
}
],
"patientName": "Patient Name",
"clinicName": "Clinic",
"gender": "Male",
"bloodGroup": "A",
"createdAt": { "$date": "2020-07-08T15:10:09.612Z" },
"updatedAt": { "$date": "2020-07-08T15:10:09.612Z" }
},
setups
{
"_id": { "$oid": "5efcba7503f4693d164e651d" },
"code": "Ca",
"codeLower": "ca",
"name": "Calcium",
"valueFrom": -75,
"valueTo": -51,
"treatmentDescription": "description...",
"isNormal": false,
"gender": "",
"recommendation": "recommendation...",
"createdAt": { "$date": "2020-07-01T16:31:50.205Z" },
"updatedAt": { "$date": "2020-07-01T16:31:50.205Z" }
},
{
"_id": { "$oid": "5efcba7503f4693d164e651e" }, // <=== should find this for Calcium
"code": "Ca",
"codeLower": "ca",
"name": "Calcium",
"valueFrom": 76,
"valueTo": 100,
"treatmentDescription": "description...",
"isNormal": false,
"gender": "",
"recommendation": "recommendation...",
"createdAt": { "$date": "2020-07-01T16:31:50.205Z" },
"updatedAt": { "$date": "2020-07-01T16:31:50.205Z" }
},
{
"_id": { "$oid": "5efcba7603f4693d164e65bb" }, // <=== should find this for Magnesium
"code": "Mg",
"codeLower": "mg",
"name": "Magnesium",
"valueFrom": -100,
"valueTo": -76,
"treatmentDescription": "description...",
"isNormal": false,
"gender": "",
"recommendation": "recommendation...",
"createdAt": { "$date": "2020-07-01T16:31:50.205Z" },
"updatedAt": { "$date": "2020-07-01T16:31:50.205Z" }
},
{
"_id": { "$oid": "5efcba7503f4693d164e6550" },
"code": "Mg",
"codeLower": "mg",
"name": "Magnesium",
"valueFrom": 76,
"valueTo": 100,
"treatmentDescription": "description...",
"isNormal": false,
"gender": "",
"recommendation": "recommendation...",
"createdAt": { "$date": "2020-07-01T16:31:50.205Z" },
"updatedAt": { "$date": "2020-07-01T16:31:50.205Z" }
}
I want to search the value from reports collection and check whether the value is in range from the setups collection and return the _id and add the returned _ids in setupIds field on reports collection.
I tried with the following aggregation framework:
db.reports.aggegrate([
{
'$match': {
'_id': new ObjectId('5f05e1d13e0f6637739e215b')
}
}, {
'$lookup': {
'from': 'setups',
'let': {
'testValue': '$testReport.value',
'testName': '$testReport.name'
},
'pipeline': [
{
'$match': {
'$expr': {
{
'$and': [
{
'$eq': [
'$name', '$$testName'
]
}, {
'$gte': [
'$valueTo', '$$testValue'
]
}, {
'$lte': [
'$valueFrom', '$$testValue'
]
}
]
}
}
}
}
],
'as': 'setupIds'
}
}
])
This query didn't find the expected results.
This is the updated reports collection I want:
{
"_id": { "$oid": "5f05e1d13e0f6637739e215b" },
"setupIds": [{ "$oid": "5efcba7503f4693d164e651e" }, { "$oid": "5efcba7603f4693d164e65bb" }], // <=== Here, array of the ObjectId (ref: "Setups")
"patientName": "Patient Name",
"clinicName": "Clinic",
"gender": "Male",
"bloodGroup": "A",
"createdAt": { "$date": "2020-07-08T15:10:09.612Z" },
"updatedAt": { "$date": "2020-07-08T15:10:09.612Z" }
},
You can try like following
[{
$match: {
_id: ObjectId('5f05e1d13e0f6637739e215b')
}
}, {
$unwind: {
path: "$testReport"
}
}, {
$lookup: {
from: 'setup',
'let': {
testValue: {
$toInt: '$testReport.value'
},
testName: '$testReport.name'
},
pipeline: [{
$match: {
$expr: {
$and: [{
"$eq": [
"$name",
"$$testName"
]
},
{
"$gte": [
"$valueTo",
"$$testValue"
]
},
{
"$lte": [
"$valueFrom",
"$$testValue"
]
}
]
}
}
}],
as: 'setupIds'
}
}, {
$group: {
_id: "$_id",
patientName: {
$first: "$patientName"
},
clinicName: {
$first: "$clinicName"
},
gender: {
$first: "$gender"
},
bloodGroup: {
$first: "$bloodGroup"
},
createdAt: {
$first: "$createdAt"
},
updatedAt: {
$first: "$updatedAt"
},
setupIds: {
$addToSet: "$setupIds._id"
}
}
}, {
$addFields: {
setupIds: {
$reduce: {
input: "$setupIds",
initialValue: [],
in: {
$setUnion: ["$$this", "$$value"]
}
}
}
}
}]
Working Mongo playground

Need to return matched data from mongo db JSON

I have Json which have values like state_city details this contains information like which city belongs to which state -
Need to query it for particular state name which will gives me all cities that belongs to that state.
db.collection.find({
"count": 10,
"state.name": "MP"
})
[
{
"collection": "collection1",
"count": 10,
"state": [
{
"name": "MH",
"city": "Mumbai"
},
{
"name": "MH",
"city": "Pune"
},
{
"name": "UP",
"city": "Kanpur"
},
{
"name": "CG",
"city": "Raipur"
}
]
},
{
"collection": "collection2",
"count": 20,
"state": [
{
"name": "MP",
"city": "Indore"
},
{
"name": "MH",
"city": "Bhopal"
},
{
"name": "UP",
"city": "Kanpur"
},
{
"name": "CG",
"city": "Raipur"
}
]
}
]
You have to use aggregate query to get only matching elements in array :
db.collection.aggregate([{
$unwind: "$content.state"
},
{
$match: {
"content.state.name": "MH",
"count": 10
}
},
{
$group: {
_id: "$content.state.city",
}
},
{
$addFields: {
key: 1
}
},
{
$group: {
_id: "$key",
cities: {
$push: "$_id"
}
}
},
{
$project: {
_id: 0,
cities: 1
}
}
])
This query will return :
{
"cities": [
"Pune",
"Mumbai"
]
}
The following query would be the solution.
db.collection.find({ "count": 10, "state":{"name": "MP"}})
For more complex queries, $elemMatch is also available.