MongoDB filtering out subdocuments with lookup aggregation - mongodb

Our project database has a capped collection called values which gets updated every few minutes with new data from sensors. These sensors all belong to a single sensor node, and I would like to query the last data from these nodes in a single aggregation. The problem I am having is filtering out just the last of ALL the types of sensors while still having only one (efficient) query. I looked around and found the $group argument, but I can't seem to figure out how to use it correctly in this case.
The database is structured as follows:
nodes:
{
"_id": 681
"sensors": [
{
"type": "foo"
},
{
"type": "bar"
}
]
}
values:
{
"_id" : ObjectId("570cc8b6ac55850d5740784e"),
"timestamp" : ISODate("2016-04-12T12:06:46.344Z"),
"type" : "foo",
"nodeid" : 681,
"value" : 10
}
{
"_id" : ObjectId("190ac8b6ac55850d5740776e"),
"timestamp" : ISODate("2016-04-12T12:06:46.344Z"),
"type" : "bar",
"nodeid" : 681,
"value" : 20
}
{
"_id" : ObjectId("167bc997bb66750d5740665e"),
"timestamp" : ISODate("2016-04-12T12:06:46.344Z"),
"type" : "bar",
"nodeid" : 200,
"value" : 20
}
{
"_id" : ObjectId("110cc9c6ac55850d5740784e"),
"timestamp" : ISODate("2016-04-09T12:06:46.344Z"),
"type" : "foo",
"nodeid" : 681,
"value" : 12
}
so let's imagine I want the data from node 681, I would want a structure like this:
nodes:
{
"_id": 681
"sensors": [
{
"_id" : ObjectId("570cc8b6ac55850d5740784e"),
"timestamp" : ISODate("2016-04-12T12:06:46.344Z"),
"type" : "foo",
"nodeid" : 681,
"value" : 10
},
{
"_id" : ObjectId("190ac8b6ac55850d5740776e"),
"timestamp" : ISODate("2016-04-12T12:06:46.344Z"),
"type" : "bar",
"nodeid" : 681,
"value" : 20
}
]
}
Notice how one value of foo is not queried, because I want to only get the latest value possible if there are more than one value (which is always going to be the case). The ordering of the collection is already according to the timestamp because the collection is capped.
I have this query, but it just gets all the values from the database (which is waaay too much to do in a lifetime, let alone one request of the web app), so I was wondering how I would filter it before it gets aggregated.
query:
db.nodes.aggregate(
[
{
$unwind: "$sensors"
},
{
$match:{
nodeid: 681
}
},
{
$lookup:{
from: "values", localField: "sensors.type", foreignField: "type", as: "sensors"
}
}
}
]
)

Try this
// Pipeline
[
// Stage 1 - sort the data collection if not already done (optional)
{
$sort: {
"timestamp":1
}
},
// Stage 2 - group by type & nodeid then get first item found in each group
{
$group: {
"_id":{type:"$type",nodeid:"$nodeid"},
"sensors": {"$first":"$$CURRENT"} //consider using $last if your collection is on reverse
}
},
// Stage 3 - project the fields in desired
{
$project: {
"_id":"$sensors._id",
"timestamp":"$sensors.timestamp",
"type":"$sensors.type",
"nodeid":"$sensors.nodeid",
"value":"$sensors.value"
}
},
// Stage 4 - group and push it to array sensors
{
$group: {
"_id":{nodeid:"$nodeid"},
"sensors": {"$addToSet":"$$CURRENT"}
}
}
]

as far as I got document structure, there is no need to use $lookup as all data is in readings(values) collection.
Please see proposed solution:
db.readings.aggregate([{
$match : {
nodeid : 681
}
},
{
$group : {
_id : {
type : "$type",
nodeid : "$nodeid"
},
readings : {
$push : {
timestamp : "$timestamp",
value : "$value",
id : "$_id"
}
}
}
}, {
$project : {
_id : "$_id",
readings : {
$slice : ["$readings", -1]
}
}
}, {
$unwind : "$readings"
}, {
$project : {
_id : "$readings.id",
type : "$_id.type",
nodeid : "$_id.nodeid",
timestamp : "$readings.timestamp",
value : "$readings.value",
}
}, {
$group : {
_id : "$nodeid",
sensors : {
$push : {
_id : "$_id",
timestamp : "$timestamp",
value : "$value",
type:"$type"
}
}
}
}
])
and output:
{
"_id" : 681,
"sensors" : [
{
"_id" : ObjectId("110cc9c6ac55850d5740784e"),
"timestamp" : ISODate("2016-04-09T12:06:46.344Z"),
"value" : 12,
"type" : "foo"
},
{
"_id" : ObjectId("190ac8b6ac55850d5740776e"),
"timestamp" : ISODate("2016-04-12T12:06:46.344Z"),
"value" : 20,
"type" : "bar"
}
]
}
Any comments welcome!

Related

Mongodb Query to get the nth document

I need to create a query in mongodb that needs to return the SECOND TO THE LAST document. I am planning to use $group for this query but i dont know what aggregation function to use. I only know $first and $last.
I have an example collection below and also include the expected output. Thank you!
"_id" : ObjectId("60dc27ac54b7c46bfa1b84b4"),
"auditlogs" : [
{
"_id" : ObjectId("60dc27ac54b7c46bfa1b84be"),
"userid" : ObjectId("5ffe702d59a9205db81fcb69"),
"action" : "ADDTRANSACTION"
},
{
"_id" : ObjectId("60dc27ac54b7c46bfa1b84bd"),
"userid" : ObjectId("5ffe644f9493e05db9245192"),
"action" : "EDITPROFILE"
},
{
"_id" : ObjectId("60dc27ac54b7c46bfa1b84bc"),
"userid" : ObjectId("5ffe64949493e05db9245197"),
"action" : "DELETETRANSACTION"
} ]
"_id" : ObjectId("60dc27ac54b7c46bfa1b75ge2"),
"auditlogs" : [
{
"_id" : ObjectId("60dc27ac54b7c46bfa1b84bb"),
"userid" : ObjectId("5ffe64b69493e05db924519b"),
"action" : "ADDTRANSACTION"
},
{
"_id" : ObjectId("60dc27ac54b7c46bfa1b84ba"),
"userid" : ObjectId("5ffe65419493e05db92451d4"),
"action" : "ADDTRANSACTION"
},
{
"_id" : ObjectId("60dc27ac54b7c46bfa1b84b9"),
"userid" : ObjectId("5ffe65689493e05db92451d9"),
"action" : "CHANGEACCESS"
},
{
"_id" : ObjectId("60dc27ac54b7c46bfa1b84b8"),
"userid" : ObjectId("5ffe65819493e05db92451dd"),
"action" : "DELETETRANSACTION"
},
{
"_id" : ObjectId("60dc27ac54b7c46bfa1b84b7"),
"userid" : ObjectId("5ffe65df9493e05db92451f3"),
"action" : "EDITPROFILE",
]
OUTPUT:
{"_id" : ObjectId("60dc27ac54b7c46bfa1b84b4"),"_id" : ObjectId("60dc27ac54b7c46bfa1b84bd"),"userid" : ObjectId("5ffe644f9493e05db9245192"),"action" : "EDITPROFILE"},
{"_id" : ObjectId("60dc27ac54b7c46bfa1b75ge2"),"_id" : ObjectId("60dc27ac54b7c46bfa1b84b8"),"userid" : ObjectId("5ffe65819493e05db92451dd"),"action" : "DELETETRANSACTION"}
You can't have two _id keys in one single object.
I've made the parent object's id to _parentId you can give it's a name anything you want except _id
Aggregation:
db.collection.aggregate([
{
$unwind: "$auditlogs"
},
{
"$project": {
"_parentId": "$_id",
"_id": "$auditlogs._id",
"action": "$auditlogs.action",
"userid": "$auditlogs.userid",
}
}
])
Playground
You can slice the array by -2 to get the last two item, then by 1 to get first one. Therefore, the array will be left the second to the last. Finally, unwind auditlogs so it can be changed from array to object which is structure that you want.
db.collection.aggregate([
{
$project: { auditlogs : { $slice: [ "$auditlogs", -2 ] } }
},
{
$project: { auditlogs : { $slice: [ "$auditlogs", 1 ] } }
},
{
$unwind: "$auditlogs"
}
])

mongodb aggregation $max and corresponding timestamp

I'm being challenged by the $group $max in an aggregation with MongoDB on Nodes Express app. Here is the a sample of the collection;
{"_id":"5b7e78cf022be03c35776bec",
"humidity":60,
"pressure":1014.18,
"temperature":26.8,
"light":2464,
"timestampiso":"2018-08-23T09:05:19.112Z",
"timestamp":1535015119112
},
{
"_id":"5b7e7892022be03c35776bea",
"humidity":60.4,
"pressure":1014.14,
"temperature":26.7,
"light":2422,
"timestampiso":"2018-08-23T09:04:18.115Z",
"timestamp":1535015058115
},
{
"_id":"5b7e7855022be03c35776be8",
"humidity":60.6,
"pressure":1014.2,
"temperature":26.6,
"light":2409,
"timestampiso":"2018-08-23T09:03:17.113Z",
"timestamp":1535014997113
}]
What I'm trying to do is to query the collection, by first retrieving the entries of the last hour based on the timestamp and then looking for highest pressure of the sample (should be 60 entries as there is one entry per minute)
What I can de is find this value. What I'm stuggling on to have the timestamp related to that max value.
When I run
db.collection("ArduinoSensorMkr1000").
aggregate([{ "$match" : {"timestamp" : {"$gte" : (Date.now()-60*60*1000)}}},
{ "$group" : {"_id" : null, maxpressure : {"$max" : "$pressure"}
}
},
{
"$project" : { "_id" : 0 }
}
])
Fine, the output is correct and I get the maxpressure as so
[{"maxpressure":1014.87}]
but what I'm trying to output is the maxpressure field but with it, its corresponding timestamp. The output should look as so
[{"maxpressure":1014.87,"timestamp":1535015058115}]
Any hints on how I get this timestamp value to show?
Thank you for your support
You can try this first need to sort your data using $sort and you can pick max value by using $first
QUERY
db.col.aggregate([
{ "$match": { "timestamp": { "$gte": (Date.now() - 60 * 60 * 1000) } } },
{ "$sort": { "pressure": -1 } },
{
"$group": {
"_id": null, "maxpressure": { "$first": "$pressure" },
"timestamp": { "$first": "$timestamp" }
}
},
{
"$project": { "_id": 0 }
}
])
DATA
[{
"_id" : "5b7e78cf022be03c35776bec",
"humidity" : 60.0,
"pressure" : 1014.18,
"temperature" : 26.8,
"light" : 2464.0,
"timestampiso" : "2018-08-23T09:05:19.112Z",
"timestamp" : 1535015119112.0
},
{
"_id" : "5b7e7892022be03c35776bea",
"humidity" : 60.4,
"pressure" : 1014.14,
"temperature" : 26.7,
"light" : 2422.0,
"timestampiso" : "2018-08-23T09:04:18.115Z",
"timestamp" : 1535015058115.0
},
{
"_id" : "5b7e7855022be03c35776be8",
"humidity" : 60.6,
"pressure" : 1014.2,
"temperature" : 26.6,
"light" : 2409.0,
"timestampiso" : "2018-08-23T09:03:17.113Z",
"timestamp" : 1535014997113.0
}]
THE OUTPUT
{
"maxpressure" : 1014.87,
"timestamp" : 1535015058115.0
}
My suggestion is to use sort/limit instead of grouping. By this way you can get entire document before project only interesting fields :
db['ArduinoSensorMkr1000'].aggregate(
[{ "$match" : {"timestamp" : {"$gte" : (Date.now()-5*60*60*1000)}}},
{$sort:{pressure:-1}},
{$limit:1},{
"$project" : { "_id" : 0,"timestamp":1,"pressure":1 }}
])

How can I do match after second level unwind in mongodb?

I am working on a software that uses MongoDB as a database. I have a collection like this (this is just one document)
{
"_id" : ObjectId("5aef51e0af42ea1b70d0c4dc"),
"EndpointId" : "89799bcc-e86f-4c8a-b340-8b5ed53caf83",
"DateTime" : ISODate("2018-05-06T19:05:04.574Z"),
"Url" : "test",
"Tags" : [
{
"Uid" : "E2:02:00:18:DA:40",
"Type" : 1,
"DateTime" : ISODate("2018-05-06T19:05:04.574Z"),
"Sensors" : [
{
"Type" : 1,
"Value" : NumberDecimal("-98")
},
{
"Type" : 2,
"Value" : NumberDecimal("-65")
}
]
},
{
"Uid" : "12:3B:6A:1A:B7:F9",
"Type" : 1,
"DateTime" : ISODate("2018-05-06T19:05:04.574Z"),
"Sensors" : [
{
"Type" : 1,
"Value" : NumberDecimal("-95")
},
{
"Type" : 2,
"Value" : NumberDecimal("-59")
},
{
"Type" : 3,
"Value" : NumberDecimal("12.939770381907275")
}
]
}
]
}
and I want to run this query on it.
db.myCollection.aggregate([
{ $unwind: "$Tags" },
{
$match: {
$and: [
{
"Tags.DateTime": {
$gte: ISODate("2018-05-06T19:05:02Z"),
$lte: ISODate("2018-05-06T19:05:09Z"),
},
},
{ "Tags.Uid": { $in: ["C1:3D:CA:D4:45:11"] } },
],
},
},
{ $unwind: "$Tags.Sensors" },
{ $match: { "$Tags.Sensors.Type": { $in: [1, 2] } } },
{
$project: {
_id: 0,
EndpointId: "$EndpointId",
TagId: "$Tags.Uid",
Url: "$Url",
TagType: "$Tags.Type",
Date: "$Tags.DateTime",
SensorType: "$Tags.Sensors.Type",
Value: "$Tags.Sensors.Value",
},
},
])
the problem is, the second match (that checks $Tags.Sensors.Type) doesn't work and doesn't affect the result of the query.
How can I solve that?
If this is not the right way, what is the right way to run these conditions?
The $match stage accepts field names without a leading $ sign. You've done that correctly in your first $match stage but in the second one you write $Tags.Sensors.Type. Simply removing the leading $ sign should make your query work.
Mind you, the whole thing can be a bit simplified (and some beautification doesn't hurt, either):
You don't need to use $and in your example since it's assumed by default if you specify more than one criterion in a filter.
The $in that you use for the Tags.Sensors.Type filter can be a simple : kind of equality operator unless you have more than one element in the list of acceptable values.
In the $project stage, instead of (kind of) duplicating identical field names you can use the <field>: 1 syntax unless the order of the fields matters.
So the final query would be something like this.
db.myCollection.aggregate([
{
"$unwind" : "$Tags"
},
{
"$match" : {
"Tags.DateTime" : { "$gte" : ISODate("2018-05-06T19:05:02Z"), "$lte" : ISODate("2018-05-06T19:05:09Z") },
"Tags.Uid" : { "$in" : ["C1:3D:CA:D4:45:11"] }
}
}, {
"$unwind" : "$Tags.Sensors"
}, {
"$match" : {
"Tags.Sensors.Type" : { "$in" : [1,2] }
}
},
{
"$project" : {
"_id" : 0,
"EndpointId" : 1,
"TagId" : "$Tags.Uid",
"Url" : 1,
"TagType" : "$Tags.Type",
"Date" : "$Tags.DateTime",
"SensorType" : "$Tags.Sensors.Type",
"Value" : "$Tags.Sensors.Value"
}
}])

How to filter and aggregate data from one collection into another format using MongoDB

I have one collection named 'ctrlcharts'.
e.g.
{
"_id" : ObjectId("57fc695492af567031246736"),
"deviceId" : "A001",
"sensorId" : "S003",
"time" : "2016/10/11 12:23:50",
"charts" : [
{
"sensor" : "ch_11",
"value" : 120
},
{
"sensor" : "ch_12",
"value" : 150
}
]
}
How to filter "sensor" : "ch_11" and aggregate data from one collection into another format using MongoDB
e.g.
{
"time" : "2016/10/11 12:23:50",
"sensor" : "ch_12",
"value" : 150
}
I tried below code
db.ctrlcharts.aggregate([
{ $match: {"deviceId" : "A001", "sensorId" : "S003", "time" : "2016/10/11 12:23:50"}},
{ $project: {
_id: 0,
time : 1 ,
sensor : "$charts.sensor"
value : "$charts.value"
}
}
])
But I got the result as
{
"time" : "2016/10/11 12:23:50",
"sensor" : ["ch_11","ch_12"],
"value" : [120,150]
}
Thanks
You tried best....just use $unwind
db.ctrlcharts.aggregate(
{$unwind:"$charts"},
{$match: {"deviceId" :"A001", "charts.sensor":"ch_12", "time" : "2016/10/11 12:23:50"}},
{$project:{_id:0,time:1, sensor : "$charts.sensor", value :"$charts.value"}}).pretty()
You can use $unwind (aggregation) to separate charts array.
db.ctrlcharts.aggregate( [ { $unwind : "$charts" } ] )
This will produce result like -
{ "_id" : ObjectId("57ff397a007c43ecacf10512"), "deviceId" : "A001", "sensorId" : "S003", "time" : "2016/10/11 12:23:50", "charts" : { "sensor" : "ch_11", "value" : 120 } }
{ "_id" : ObjectId("57ff397a007c43ecacf10512"), "deviceId" : "A001", "sensorId" : "S003", "time" : "2016/10/11 12:23:50", "charts" : { "sensor" : "ch_12", "value" : 150 } }
and then use your match query
Use the $arrayElemAt and $filter operators to query the array more efficiently without the need to $unwind. The reason why $unwind is not as efficient is that it produces a cartesian product of the documents i.e. a copy of each document per array entry, which uses more memory (possible memory cap on aggregation pipelines of 10% total memory) and therefore takes time to produce as well processing the documents during the flattening process.
The $filter will return a subset of the array that only contains the elements that match the filter condition. The $arrayElemAt operator then returns the element from the filtered array at the specified array index to give you the subdocument you need.
A further $project is necessary to flatten the fields to give you the desired result:
db.ctrlcharts.aggregate([
{ "$match": {
"deviceId": "A001",
"sensorId": "S003",
"time": "2016/10/11 12:23:50",
"charts.sensor": "ch_11"
} },
{
"$project": {
"time": 1,
"chart": {
"$arrayElemAt": [
"$filter": {
"input": "$charts",
"as": "item",
"cond": { "$eq": ["$$item.sensor", "ch_11"] }
}, 0
]
}
}
},
{
"$project": {
"_id": 0,
"time": 1,
"sensor": "$chart.sensor"
"value": "$chart.value"
}
}
])

MongoDb aggregation framework value of a field where max another field

I have a collection that has records looking like this:
"_id" : ObjectId("550424ef2f44472856286d56"), "accountId" : "123",
"contactOperations" :
[
{ "contactId" : "1", "operation" : 1, "date" : 500 },
{ "contactId" : "1", "operation" : 2, "date" : 501 },
{ "contactId" : "2", "operation" : 1, "date" : 502 }
]
}
I want to know the latest operation number that has been applied on a certain contact.
I'm using the aggregation framework to first unwind the contactOperations and then grouping by accountId and contactOperations.contactId and max contactOperations.date.
aggregate([{$unwind : "$contactOperations"}, {$group : {"_id":{"accountId":"$accountId", "contactId":"$contactOperations.contactId"}, "date":{$max:"$contactOperations.date"} }}])
The result I get is:
"_id" : { "accountId" : "123", "contactId" : "2" }, "time" : 502 }
"_id" : { "accountId" : "123", "contactId" : "1" }, "time" : 501 }
Which seems correct so far, but I also need the contactOperations.operation field that was recorded with $max date. How can I select that?
You have to sort the unwind values then apply $last operator to get operation for max date. Hope this query will solve your problem.
aggregate([
{
$unwind: "$contactOperations"
},
{
$sort: {
"date": 1
}
},
{
$group: {
"_id": {
"accountId": "$accountId",
"contactId": "$contactOperations.contactId"
},
"date": {
$max: "$contactOperations.date"
},
"operationId": {
$last: "$contactOperations.operation"
}
}
}
])