Group and aggregate on sub document in mongodb - mongodb

OBS! Noob question probably :)
Given the following data, how can I query and return a summary for each index?
[
{
"title": "test",
"indexes":[
{ "id":1, "value": 0.5764860139860139860139860140 },
{ "id":2, "value": 0.3083479020979020979020979020 },
{ "id":3, "value": 0.1151660839160839160839160838 }
]
},
{
"title": "test",
"indexes":[
{ "id":1, "value": 0.5764860139860139860139860140 },
{ "id":2, "value": 0.3083479020979020979020979020 },
{ "id":3, "value": 0.1151660839160839160839160838 }
]
},
{
"title": "test",
"indexes":[
{ "id":1, "value": 0.5764860139860139860139860140 },
{ "id":2, "value": 0.3083479020979020979020979020 },
{ "id":3, "value": 0.1151660839160839160839160838 }
]
},
{
"title": "test",
"indexes":[
{ "id":1, "value": 0.5764860139860139860139860140 },
{ "id":2, "value": 0.3083479020979020979020979020 },
{ "id":3, "value": 0.1151660839160839160839160838 }
]
}
]
I.e. I want to produce something like this:
index.id:1, total: 2.305...
index.id:2, total: 1.233...
etc

db.collection.aggregate([
{
"$unwind": "$indexes"
},
{
$group: {
_id: "$indexes.id",
total: {
$sum: "$indexes.value"
}
}
}
])
try this query
you will get like this
[
{
"_id": 2,
"total": 1.2333916083916083
},
{
"_id": 1,
"total": 2.305944055944056
},
{
"_id": 3,
"total": 0.4606643356643357
}
]

db.collection.aggregate([
{
$unwind: "$indexes"
},
{
$group: {
_id: "$indexes.id",
total: {
$sum: "$indexes.value"
}
}
}
])
Working Mongo playground

Related

how to update nested doc in mongo

I need to batch update cdi_tags with md5 collection(push item to cdi_tags) as follows:
db.getCollection("event").update({
"_id": {
$in: [ObjectId("6368f03e21b1ad246c84d67b"), ObjectId("6368f03f21b1ad246c84d982")]
},
"meta": {
$elemMatch: {
"key": "bags",
"value"
}
}
}, {
$addToSet: {
"meta.$.value.$[t].cdi_tags: "test_tag"
}
}, {
arrayFilters: [{
"t": {
$in: ["cc09ab29db36f85e154d2c1ae9517f57", "b6b9c266f584191b6eb2d2659948a7a9"]
}
}],
multi: true
})
but not work, my doc as follows
{
"_id": ObjectId("6368f03f21b1ad246c84d982"),
"event_key": "PLAA7-194710",
"data_source": "EAP",
"name": "EP33L-AA94710",
"production": "CP",
"meta":
[
{
"key": "auto_note",
"value":
[]
},
{
"key": "bags",
"value":
{
"cc09ab29db36f85e154d2c1ae9517f57":
{
"name": "PLAA63952_event_ftp_pcar_event_20221107-194709_0.bag",
"profile": "msquare-prediction-ro",
"md5": "cc09ab29db36f85e154d2c1ae9517f57",
"cdi_tags":
[
"FDI_V1_0_0",
"from_dpp",
"epl-no-sensor",
"with_f100",
"epl-fix_video",
"epl-fix_horizon"
]
},
"361f5160cca3c3dec90cbbf93e3d7ae3":
{
"name": "PLAA63952_event_ftp_pcar_event_20221107-194709_0.bag",
"profile": "msquare-prediction-ro",
"md5": "361f5160cca3c3dec90cbbf93e3d7ae3",
"cdi_tags":
[
"FDI_V1_0_0",
"china",
"ftp_epcar_rawdata",
"from_dpp",
"trigger_type:system"
]
}
}
}
]
}
thinks
how to batch update nested doc in mongo

MongoDB query to include count of most frequent values for multiple fields

Thank you in advance for any help!
I've a collection QR with schema similar to this:
var qrSchema = new Schema({
qrId: { type: String, index: true },
owner: { type: Schema.Types.ObjectId, ref: 'User' },
qrName: { type: String },
qrCategory: { type: String, index: true },
shortUrl: { type: String}}
})
And collection Datas similar to this:
var dataSchema = new Schema({
qrId: { type: String, index: true}
city: { type: String},
device: { type: String},
date: { type: Date, index:true},
})
The relation between QR and Datas is 1-to-many.
I've an aggregate like this:
Model.QR.aggregate([
{ $match: {
$and: [
{ owner: mongoose.Types.ObjectId(user._id) },
{
$expr: {
$cond: [
{ $in: [ category, [ null, "", "undefined" ]] },
true,
{ $eq: [ "$qrCategory", category ] }
]
}
}
]
}
},
{ $lookup:
{
"from": "datas",
"localField": "qrId",
"foreignField": "qrId",
"as": "data"
}
},
{
$project: {
_id: 0,
qrId: 1,
qrName: 1,
qrCategory: 1,
shortUrl: 1,
data: {
$filter: {
input: "$data",
as: "item",
cond: {
$and: [
{ $gte: [ "$$item.date", date.start ] },
{ $lte: [ "$$item.date", date.end ] }
] }
}
}
}
},
{
$group: {
_id: { "qrId": "$qrId", "qrName": "$qrName", "qrCategory": "$qrCategory", "shortUrl": "$shortUrl" },
data: {
$push: {
dataItems: "$data",
count: {
$size: { '$ifNull': ['$data', []] }
}
}
}
}
},
{
$sort: {
"data.count": -1
}
},
{
$limit: 10,
}]).exec((err, results) => { })
Which is returning something like:
[
{
"_id": {
"qrId": "0PRA",
"qrName": "Campaign 0PRA",
"qrCategory": "html",
"shortUrl": "http://someurl.com/0PRA"
},
"data": [
{
"dataItems": [
{
"_id": "6200f2a8c0cf7a1c49233c7f",
"qrId": "0PRA",
"device": "iOS",
"city": "Beijing",
},
{
"_id": "6200f2eac0cf7a1c49233c80",
"qrId": "0PRA",
"device": "AndroidOS",
"city": "Beijing",
},
{
"_id": "6200f3a4c0cf7a1c49233c81",
"qrId": "0PRA",
"device": "AndroidOS",
"city": "Beijing",
},
{
"_id": "6200f632c0cf7a1c49233c88",
"qrId": "0PRA",
"device": "AndroidOS",
"city": "Nanchang",
},
{
"_id": "6201b342c0cf7a1c49233caa",
"qrId": "0PRA",
"device": "iOS",
"city": "Taizhou",
}
],
"count": 5
}
]
},
{
"_id": {
"qrId": "NQ17",
"qrName": "Campaign NQ17",
"qrCategory": "menu",
"shortUrl": "http://someurl.com/NQ17"
},
"data": [
{
"dataItems": [
{
"_id": "6200f207c0cf7a1c49233c7a",
"qrId": "NQ17",
"device": "iOS",
"city": "Singapore"
},
{
"_id": "8200f207c1cf7a1c49233c7a",
"qrId": "NQ17",
"device": "iOS",
"city": "Singapore"
},
{
"_id": "6200ac5db44f23b9ec2b6040",
"qrId": "NQ17",
"device": "AndroidOS",
"city": "San Antonio"
}
],
"count": 3
}
]
}
]
I'm trying to include the most frequent device and city in the results after the count of dataItems, like this:
[
{
"_id": {
"qrId": "0PRA",
"qrName": "Campaign 0PRA",
"qrCategory": "html",
"shortUrl": "http://someurl.com/0PRA"
},
"data": [
{
"dataItems": [
{
"_id": "6200f2a8c0cf7a1c49233c7f",
"qrId": "0PRA",
"device": "iOS",
"city": "Beijing",
},
{
"_id": "6200f2eac0cf7a1c49233c80",
"qrId": "0PRA",
"device": "AndroidOS",
"city": "Beijing",
},
{
"_id": "6200f3a4c0cf7a1c49233c81",
"qrId": "0PRA",
"device": "AndroidOS",
"city": "Beijing",
},
{
"_id": "6200f632c0cf7a1c49233c88",
"qrId": "0PRA",
"device": "AndroidOS",
"city": "Nanchang",
},
{
"_id": "6201b342c0cf7a1c49233caa",
"qrId": "0PRA",
"device": "iOS",
"city": "Taizhou",
}
],
"count": 5,
"topDevice": "AndroidOS", // <---- trying to add this
"topLocation": "Beijing" // <---- trying to add this
}
]
},
{
"_id": {
"qrId": "NQ17",
"qrName": "Campaign NQ17",
"qrCategory": "menu",
"shortUrl": "http://someurl.com/NQ17"
},
"data": [
{
"dataItems": [
{
"_id": "6200f207c0cf7a1c49233c7a",
"qrId": "NQ17",
"device": "iOS",
"city": "Singapore"
},
{
"_id": "8200f207c1cf7a1c49233c7a",
"qrId": "NQ17",
"device": "iOS",
"city": "Singapore"
},
{
"_id": "6200ac5db44f23b9ec2b6040",
"qrId": "NQ17",
"device": "android",
"city": "San Antonio"
}
],
"count": 3,
"topDevice": "iOS", // <---- trying to add this
"topLocation": "Singapore" // <---- trying to add this
}
]
}
]
Is this possible?
Thank you very much in advance for any help or hints!
Method 1
Use $function will be way more easier. MongoDB version >= 4.4
Sort function in js
db.collection.aggregate([
{
"$set": {
"data": {
"$map": {
"input": "$data",
"as": "d",
"in": {
"count": "$$d.count",
"dataItems": "$$d.dataItems",
"topDevice": {
$function: {
body: "function(arr) {return arr.sort((a,b) =>arr.filter(v => v===a).length-arr.filter(v => v===b).length).pop() }",
args: [ "$$d.dataItems.device" ],
lang: "js"
}
},
"topLocation": {
$function: {
body: "function(arr) {return arr.sort((a,b) =>arr.filter(v => v===a).length-arr.filter(v => v===b).length).pop() }",
args: [ "$$d.dataItems.city" ],
lang: "js"
}
}
}
}
}
}
}
])
mongoplayground
Method 2
db.qr.aggregate([
{
"$match": {
owner: {
"$in": [
"1",
"2"
]
}
}
},
{
"$lookup": {
"from": "data",
"localField": "qrId",
"foreignField": "qrId",
"as": "data",
"pipeline": [
{
"$match": {
"$and": [
{
"date": {
"$gte": ISODate("2021-09-01T01:23:25.184Z")
}
},
{
"date": {
"$lte": ISODate("2021-09-02T11:23:25.184Z")
}
}
]
}
},
{
"$facet": {
"deviceGroup": [
{
"$group": {
"_id": "$device",
"sum": {
"$sum": 1
}
}
},
{
"$sort": {
sum: -1
}
},
{
"$limit": 1
}
],
"cityGroup": [
{
"$group": {
"_id": "$city",
"sum": {
"$sum": 1
}
}
},
{
"$sort": {
sum: -1
}
},
{
"$limit": 1
}
],
"all": []
}
}
]
}
},
{
"$set": {
"data": {
"$first": "$data.all"
},
"topDevice": {
"$first": {
"$first": "$data.deviceGroup._id"
}
},
"topLocation": {
"$first": {
"$first": "$data.cityGroup._id"
}
}
}
},
{
$group: {
_id: {
"qrId": "$qrId",
"qrName": "$qrName",
"qrCategory": "$qrCategory",
"shortUrl": "$shortUrl"
},
data: {
$push: {
dataItems: "$data",
topDevice: "$topDevice",
topLocation: "$topLocation",
count: {
$size: {
"$ifNull": [
"$data",
[]
]
}
}
}
}
}
}
])
mongoplayground
Query
add the match you need, i didn't understand what the match should do
lookup on qrId
filter to keep only the start<=dates<=end (replace the 1 and 100)
facet to group all-documents, the topDevice the topLocation
$set to bring those data out from the nested locations they are
count is added as the size of all-documents
*maybe i am missing something, but try it(first part i think its like YuTing answer)
Test code here
QR.aggregate(
[{"$lookup":
{"from":"Datas",
"localField":"qrId",
"foreignField":"qrId",
"pipeline":
[{"$match":{"$and":[{"date":{"$gte":1}}, {"date":{"$lte":100}}]}},
{"$facet":
{"dataItems":[],
"topDevice":
[{"$group":{"_id":"$device", "count":{"$sum":1}}},
{"$sort":{"count":-1}}, {"$limit":1}],
"topLocation":
[{"$group":{"_id":"$city", "count":{"$sum":1}}},
{"$sort":{"count":-1}}, {"$limit":1}]}}],
"as":"data"}},
{"$set":{"data":{"$arrayElemAt":["$data", 0]}}},
{"$set":
{"dataItems":"$data.dataItems",
"count":{"$size":"$data.dataItems"},
"topDevice":
{"$getField":
{"field":"_id", "input":{"$arrayElemAt":["$data.topDevice", 0]}}},
"topLocation":
{"$getField":
{"field":"_id",
"input":{"$arrayElemAt":["$data.topLocation", 0]}}},
"data":"$$REMOVE"}}])

data stored in map, need all values of all object in an array

We have data like
[{
"parameterId": "5f914ca2679bae721d38410b",
"average": 574998.153846154,
"count": 26.0,
"date": "2020-09-08T18:30:00.000Z",
"dataPerHour": {
"0": {
"min": 92570.0,
"max": 995170.0,
"avg": 578268.826086957,
"count": 23,
"date": "2020-09-04T19:07:41.000Z",
"values": [{
"paramValue": "100414",
"time": "2020-09-04T19:07:41.000Z"
},
{
"paramValue": "705811",
"time": "2020-09-04T19:08:41.000Z"
}
]
},
"1": {
"min": 92570.0,
"max": 995170.0,
"avg": 678268.826086957,
"count": 23,
"date": "2020-09-03T19:07:41.000Z",
"values": [{
"paramValue": "100414",
"time": "2020-09-03T19:07:41.000Z"
},
{
"paramValue": "705811",
"time": "2020-09-03T19:08:41.000Z"
}
]
}
}
}, {
"parameterId": "5f914ca2679bae721d38410b",
"average": 574998.153846154,
"count": 26.0,
"date": "2020-09-08T18:30:00.000Z",
"dataPerHour": {
"0": {
"min": 92570.0,
"max": 995170.0,
"avg": 778268.826086957,
"count": 23,
"date": "2020-09-04T19:07:41.000Z",
"values": [{
"paramValue": "100414",
"time": "2020-09-04T19:07:41.000Z"
},
{
"paramValue": "705811",
"time": "2020-09-04T19:08:41.000Z"
}
]
}
}
}]
We need output:
[
"2020-09-08T18:30:00" : "578268.826086957",
"2020-09-03T19:07:41" : "678268.826086957",
"2020-09-08T18:30:00" : "778268.826086957"
]
I need mongo query for this. I need data like key = date and value = avg of each data in dataPerHour.
Does this help?
Playground
out
[
{
"data": [
{
"2020-09-03T19:07:41": 678268.826086957,
"2020-09-04T19:07:41": 578268.826086957
},
{
"2020-09-04T19:07:41": 778268.826086957
}
]
}
]
pipe
db.collection.aggregate([
{
$project: {
data: {
"$arrayToObject": {
$map: {
input: {
"$objectToArray": "$dataPerHour"
},
as: "d",
in: {
$cond: [
"$$d",
[
{
"$dateToString": {
"date": {
$toDate: "$$d.v.date"
},
"format": "%Y-%m-%dT%H:%M:%S"
}
},
{
"$toDouble": "$$d.v.avg"
}
],
"$$d"
]
}
}
}
}
}
},
{
$group: {
_id: null,
data: {
$push: "$$ROOT.data"
}
}
},
{
$unset: "_id"
}
])

mongodb aggregate lookup with a query

I have collections with following values:
reports
{
"_id": { "$oid": "5f05e1d13e0f6637739e215b" },
"testReport": [
{
"name": "Calcium",
"value": "87",
"slug": "ca",
"details": {
"description": "description....",
"recommendation": "recommendation....",
"isNormal": false
}
},
{
"name": "Magnesium",
"value": "-98",
"slug": "mg",
"details": {
"description": "description....",
"recommendation": "recommendation....",
"isNormal": false
}
}
],
"patientName": "Patient Name",
"clinicName": "Clinic",
"gender": "Male",
"bloodGroup": "A",
"createdAt": { "$date": "2020-07-08T15:10:09.612Z" },
"updatedAt": { "$date": "2020-07-08T15:10:09.612Z" }
},
setups
{
"_id": { "$oid": "5efcba7503f4693d164e651d" },
"code": "Ca",
"codeLower": "ca",
"name": "Calcium",
"valueFrom": -75,
"valueTo": -51,
"treatmentDescription": "description...",
"isNormal": false,
"gender": "",
"recommendation": "recommendation...",
"createdAt": { "$date": "2020-07-01T16:31:50.205Z" },
"updatedAt": { "$date": "2020-07-01T16:31:50.205Z" }
},
{
"_id": { "$oid": "5efcba7503f4693d164e651e" }, // <=== should find this for Calcium
"code": "Ca",
"codeLower": "ca",
"name": "Calcium",
"valueFrom": 76,
"valueTo": 100,
"treatmentDescription": "description...",
"isNormal": false,
"gender": "",
"recommendation": "recommendation...",
"createdAt": { "$date": "2020-07-01T16:31:50.205Z" },
"updatedAt": { "$date": "2020-07-01T16:31:50.205Z" }
},
{
"_id": { "$oid": "5efcba7603f4693d164e65bb" }, // <=== should find this for Magnesium
"code": "Mg",
"codeLower": "mg",
"name": "Magnesium",
"valueFrom": -100,
"valueTo": -76,
"treatmentDescription": "description...",
"isNormal": false,
"gender": "",
"recommendation": "recommendation...",
"createdAt": { "$date": "2020-07-01T16:31:50.205Z" },
"updatedAt": { "$date": "2020-07-01T16:31:50.205Z" }
},
{
"_id": { "$oid": "5efcba7503f4693d164e6550" },
"code": "Mg",
"codeLower": "mg",
"name": "Magnesium",
"valueFrom": 76,
"valueTo": 100,
"treatmentDescription": "description...",
"isNormal": false,
"gender": "",
"recommendation": "recommendation...",
"createdAt": { "$date": "2020-07-01T16:31:50.205Z" },
"updatedAt": { "$date": "2020-07-01T16:31:50.205Z" }
}
I want to search the value from reports collection and check whether the value is in range from the setups collection and return the _id and add the returned _ids in setupIds field on reports collection.
I tried with the following aggregation framework:
db.reports.aggegrate([
{
'$match': {
'_id': new ObjectId('5f05e1d13e0f6637739e215b')
}
}, {
'$lookup': {
'from': 'setups',
'let': {
'testValue': '$testReport.value',
'testName': '$testReport.name'
},
'pipeline': [
{
'$match': {
'$expr': {
{
'$and': [
{
'$eq': [
'$name', '$$testName'
]
}, {
'$gte': [
'$valueTo', '$$testValue'
]
}, {
'$lte': [
'$valueFrom', '$$testValue'
]
}
]
}
}
}
}
],
'as': 'setupIds'
}
}
])
This query didn't find the expected results.
This is the updated reports collection I want:
{
"_id": { "$oid": "5f05e1d13e0f6637739e215b" },
"setupIds": [{ "$oid": "5efcba7503f4693d164e651e" }, { "$oid": "5efcba7603f4693d164e65bb" }], // <=== Here, array of the ObjectId (ref: "Setups")
"patientName": "Patient Name",
"clinicName": "Clinic",
"gender": "Male",
"bloodGroup": "A",
"createdAt": { "$date": "2020-07-08T15:10:09.612Z" },
"updatedAt": { "$date": "2020-07-08T15:10:09.612Z" }
},
You can try like following
[{
$match: {
_id: ObjectId('5f05e1d13e0f6637739e215b')
}
}, {
$unwind: {
path: "$testReport"
}
}, {
$lookup: {
from: 'setup',
'let': {
testValue: {
$toInt: '$testReport.value'
},
testName: '$testReport.name'
},
pipeline: [{
$match: {
$expr: {
$and: [{
"$eq": [
"$name",
"$$testName"
]
},
{
"$gte": [
"$valueTo",
"$$testValue"
]
},
{
"$lte": [
"$valueFrom",
"$$testValue"
]
}
]
}
}
}],
as: 'setupIds'
}
}, {
$group: {
_id: "$_id",
patientName: {
$first: "$patientName"
},
clinicName: {
$first: "$clinicName"
},
gender: {
$first: "$gender"
},
bloodGroup: {
$first: "$bloodGroup"
},
createdAt: {
$first: "$createdAt"
},
updatedAt: {
$first: "$updatedAt"
},
setupIds: {
$addToSet: "$setupIds._id"
}
}
}, {
$addFields: {
setupIds: {
$reduce: {
input: "$setupIds",
initialValue: [],
in: {
$setUnion: ["$$this", "$$value"]
}
}
}
}
}]
Working Mongo playground

Aggregate nested arrays

I have multiple documents, and I'm trying to aggregate all documents with companyId = xxx and return one array with all the statuses.
So it will look like this:
[
{
"status": "created",
"date": "2019-03-16T10:59:59.200Z"
},
{
"status": "completed",
"date": "2019-03-16T11:00:37.750Z"
},
{
"status": "created",
"date": "2019-03-16T10:59:59.200Z"
},
{
"status": "completed",
"date": "2019-03-16T11:00:37.750Z"
},
{
"status": "created",
"date": "2019-03-16T10:59:59.200Z"
},
{
"status": "completed",
"date": "2019-03-16T11:00:37.750Z"
},
{
"status": "created",
"date": "2019-03-16T10:59:59.200Z"
},
{
"status": "completed",
"date": "2019-03-16T11:00:37.750Z"
}
]
The document look like this:
[
{
"companyId": "xxx",
"position": "",
"section": "",
"comment": "",
"items": [
{
"any": "111",
"name": "some name",
"description": "some description",
"version": "3",
"status": [
{
"status": "created",
"date": "2019-03-16T10:59:59.200Z"
},
{
"status": "completed",
"date": "2019-03-16T11:00:37.750Z"
}
]
},
{
"any": "222",
"name": "some name",
"description": "some description",
"version": "3",
"status": [
{
"status": "created",
"date": "2019-03-16T10:59:59.200Z"
},
{
"status": "completed",
"date": "2019-03-16T11:00:37.750Z"
}
]
}
]
},
{
"companyId": "xxx",
"position": "",
"section": "",
"comment": "",
"items": [
{
"any": "111",
"name": "some name",
"description": "some description",
"version": "3",
"status": [
{
"status": "created",
"date": "2019-03-16T10:59:59.200Z"
},
{
"status": "completed",
"date": "2019-03-16T11:00:37.750Z"
}
]
},
{
"any": "222",
"name": "some name",
"description": "some description",
"version": "3",
"status": [
{
"status": "created",
"date": "2019-03-16T10:59:59.200Z"
},
{
"status": "completed",
"date": "2019-03-16T11:00:37.750Z"
}
]
}
]
}
]
Any suggestion, how to implement this?
Then I want to loop over the array (in code) and count how many items in status created, and completed. maybe it could be done with the query?
Thanks in advance
You can use below aggregation:
db.col.aggregate([
{
$match: { companyId: "xxx" }
},
{
$unwind: "$items"
},
{
$unwind: "$items.status"
},
{
$replaceRoot: {
newRoot: "$items.status"
}
},
{
$group: {
_id: "$status",
count: { $sum: 1 }
}
}
])
Double $unwind will return single status per document and then you can use $replaceRoot to promote each status to root level of your document.
Additionally you can add $group stage to count documents by status.
In addition to the #mickl answer, you can add $project pipeline to get the result as a flat list of status and count.
db.collectionName.aggregate([
{
$match: { companyId: "xxx" }
},
{
$unwind: "$items"
},
{
$unwind: "$items.status"
},
{
$replaceRoot: {
newRoot: "$items.status"
}
},
{
$group: {
_id: "$status",
count: { $sum: 1 }
}
},
{
$project: {
"status":"$_id",
"count":1,
_id:0
}
}
])
If the number of documents on which you are executing the above query is too much then you should avoid using $unwind in the initial stage of aggregation pipeline.
Either you should use $project after $match to reduce the selection of fields or you can use below query:
db.col.aggregate([
{
$match: {
companyId: "xxx"
}
},
{
$project: {
_id: 0,
data: {
$reduce: {
input: "$items.status",
initialValue: [
],
in: {
$concatArrays: [
"$$this",
"$$value"
]
}
}
}
}
},
{
$unwind: "$data"
},
{
$replaceRoot: {
newRoot: "$data"
}
}
])