MongoDB how to select only specific sub_elements for a document? - mongodb

In MongoDB I have documents like this:
{
"_id" : ObjectId("5cc9f3c87aa1024e079a3abf"),
"created_at" : ISODate("2019-04-01T00:00:00.000Z"),
"demographics" : [
{
"key" : "gender",
"value" : "male"
},
{
"key" : "birth_year",
"value" : 1992
},
{
"key" : "city_or_rural",
"value" : "rural"
},
{
"key" : "car_purchase_intention",
"value" : "no"
},
{
"key" : "education_level",
"value" : "high"
},
{
"key" : "age",
"value" : 26
}
]
}
And I would like to make a query by "created_at" but for each document in the result only returning the demographics elements which key are "age" or "gender".
I am trying combinations of $unwind and $project but I can not obtain any proper result.
I am expecting results as this:
# 1
{
"_id" : ObjectId("5cc9f3c87aa1024e079a3abf"),
"created_at" : ISODate("2019-04-01T00:00:00.000Z"),
"demographics" : [
{
"key" : "gender",
"value" : "male"
},
{
"key" : "age",
"value" : 26
}
]
}
# 2
{
"_id" : ObjectId("5cc9f3c87aa1024e079axxx"),
"created_at" : ISODate("2019-04-01T00:00:00.000Z"),
"demographics" : [
{
"key" : "gender",
"value" : "female"
},
{
"key" : "age",
"value" : 56
}
]
}

You can use $filter aggregation after you match by created_at field.
$filter selects a subset of an array to return based on the specified condition and returns an array with only those elements that match the condition.
db.collection.aggregate([
{
$match: {
created_at: ISODate("2019-04-01T00:00:00.000Z")
}
},
{
$project: {
created_at: "$created_at",
demographics: {
$filter: {
input: "$demographics",
as: "item",
cond: {
$in: [
"$$item.key",
[
"gender",
"age"
]
]
}
}
}
}
}
])
Playground

Related

Mongoose, filter subdocument array by date

[{
"_id" : ObjectId("5f3d0f13fd6fd6667f8f56d6"),
"name" : "A",
"prices" : [
{
"_id" : ObjectId("5f3d0f16fd6fd6667f8f57fb"),
"d" : ISODate("2019-08-19T00:00:00.000Z"),
"h" : 182.1,
},
{
"_id" : ObjectId("5f3d0f16fd6fd6667f8f57fc"),
"d" : ISODate("2019-08-20T00:00:00.000Z"),
"h" : 182.1,
},
{
"_id" : ObjectId("5f3d0f16fd6fd6667f81f57fc"),
"d" : ISODate("2019-08-21T00:00:00.000Z"),
"h" : 182.1,
}
]
}]
Input:
from: '2019-08-20'
to: '2019-08-21'
Exepected output
[{
"_id" : ObjectId("5f3d0f13fd6fd6667f8f56d6"),
"name" : "A",
"prices" : [
{
"_id" : ObjectId("5f3d0f16fd6fd6667f8f57fc"),
"d" : ISODate("2019-08-20T00:00:00.000Z"),
"h" : 182.1,
},
{
"_id" : ObjectId("5f3d0f16fd6fd6667f81f57fc"),
"d" : ISODate("2019-08-21T00:00:00.000Z"),
"h" : 182.1,
}
]
}]
So I want to filter prices array so it only returns items within the given date range based on variable d
So some form of aggregation.
mongoose.model("stock").aggregate(...)
Some combination of $unwind $filter, $gte, $gle
You can do as below
db.collection.aggregate([
{
$project: {
items: {
$filter: {
input: "$prices",
as: "price",
cond: {
"$and": [//only date conditions
{
$gte: [
"$$price.d",
new Date("2019-08-20")
]
},
{
$lte: [
"$$price.d",
new Date("2019-08-21")
]
}
]
}
}
}
}
}
])
play

mongodb find the document by id and then group the result based on name field

I have a collection with multiple documents like
{
"_id" : ObjectId("5a64d076bfd103df081967ae"),
"status" : "",
"Number" : 53,
"values" : [
{
"date" : "2015-05-18",
"value" : 12.41
},
{
"date" : "2015-05-19",
"value" : 12.45
},
],
"Name" : "ABC Banking",
"scheme":"ABC1",
"createdDate" : "21-01-2018"
}
{
"_id" : ObjectId("5a64d076bfd103df081967ae"),
"status" : "",
"Number" : 53,
"values" : [
{
"date" : "2015-05-18",
"value" : 13.41
},
{
"date" : "2015-05-19",
"value" : 13.45
},
],
"Name" : "ABC Banking",
"scheme":"ABC2",
"createdDate" : "21-01-2018"
}
I am Querying collection based on Number field like
db.getCollection('mfhistories').find({'Number':53})
to get all the documents with this Number.
Now I want to group all the collection with Name 'ABC Banking' into an array. so that I will get result based on Name.
so the result should be like
{
"Name":"ABC Banking",
[
{
"_id" : ObjectId("5a64d076bfd103df081967ae"),
"status" : "",
"Number" : 53,
"values" : [
{
"date" : "2015-05-18",
"value" : 13.41
},
{
"date" : "2015-05-19",
"value" : 13.45
},
],
"scheme":"ABC1",
"createdDate" : "21-01-2018"
},
{
"_id" : ObjectId("5a64d076bfd103df081967ae"),
"status" : "",
"Number" : 53,
"values" : [
{
"date" : "2015-05-18",
"value" : 13.41
},
{
"date" : "2015-05-19",
"value" : 13.45
}
],
"scheme":"ABC2",
"createdDate" : "21-01-2018"
}
]
}
Please help..
Thanks,
J
You can use Aggregation Framework for that:
db.col.aggregate([
{
$match: { Number: 53, Name: "ABC Banking" }
},
{
$group: {
_id: "$Name",
docs: { $push: "$$ROOT" }
}
},
{
$project: {
Name: "$_id",
_id: 0,
docs: 1
}
}
])
$$ROOT is a special variable which captures entire document. More here.
db.mfhistories.aggregate(
// Pipeline
[
// Stage 1
{
$match: {
Number: 53
}
},
// Stage 2
{
$group: {
_id: {
Name: '$Name'
},
docObj: {
$addToSet: '$$CURRENT'
}
}
},
// Stage 3
{
$project: {
Name: '$_id.Name',
docObj: 1,
_id: 0
}
}
]
);

MongoDB filtering out subdocuments with lookup aggregation

Our project database has a capped collection called values which gets updated every few minutes with new data from sensors. These sensors all belong to a single sensor node, and I would like to query the last data from these nodes in a single aggregation. The problem I am having is filtering out just the last of ALL the types of sensors while still having only one (efficient) query. I looked around and found the $group argument, but I can't seem to figure out how to use it correctly in this case.
The database is structured as follows:
nodes:
{
"_id": 681
"sensors": [
{
"type": "foo"
},
{
"type": "bar"
}
]
}
values:
{
"_id" : ObjectId("570cc8b6ac55850d5740784e"),
"timestamp" : ISODate("2016-04-12T12:06:46.344Z"),
"type" : "foo",
"nodeid" : 681,
"value" : 10
}
{
"_id" : ObjectId("190ac8b6ac55850d5740776e"),
"timestamp" : ISODate("2016-04-12T12:06:46.344Z"),
"type" : "bar",
"nodeid" : 681,
"value" : 20
}
{
"_id" : ObjectId("167bc997bb66750d5740665e"),
"timestamp" : ISODate("2016-04-12T12:06:46.344Z"),
"type" : "bar",
"nodeid" : 200,
"value" : 20
}
{
"_id" : ObjectId("110cc9c6ac55850d5740784e"),
"timestamp" : ISODate("2016-04-09T12:06:46.344Z"),
"type" : "foo",
"nodeid" : 681,
"value" : 12
}
so let's imagine I want the data from node 681, I would want a structure like this:
nodes:
{
"_id": 681
"sensors": [
{
"_id" : ObjectId("570cc8b6ac55850d5740784e"),
"timestamp" : ISODate("2016-04-12T12:06:46.344Z"),
"type" : "foo",
"nodeid" : 681,
"value" : 10
},
{
"_id" : ObjectId("190ac8b6ac55850d5740776e"),
"timestamp" : ISODate("2016-04-12T12:06:46.344Z"),
"type" : "bar",
"nodeid" : 681,
"value" : 20
}
]
}
Notice how one value of foo is not queried, because I want to only get the latest value possible if there are more than one value (which is always going to be the case). The ordering of the collection is already according to the timestamp because the collection is capped.
I have this query, but it just gets all the values from the database (which is waaay too much to do in a lifetime, let alone one request of the web app), so I was wondering how I would filter it before it gets aggregated.
query:
db.nodes.aggregate(
[
{
$unwind: "$sensors"
},
{
$match:{
nodeid: 681
}
},
{
$lookup:{
from: "values", localField: "sensors.type", foreignField: "type", as: "sensors"
}
}
}
]
)
Try this
// Pipeline
[
// Stage 1 - sort the data collection if not already done (optional)
{
$sort: {
"timestamp":1
}
},
// Stage 2 - group by type & nodeid then get first item found in each group
{
$group: {
"_id":{type:"$type",nodeid:"$nodeid"},
"sensors": {"$first":"$$CURRENT"} //consider using $last if your collection is on reverse
}
},
// Stage 3 - project the fields in desired
{
$project: {
"_id":"$sensors._id",
"timestamp":"$sensors.timestamp",
"type":"$sensors.type",
"nodeid":"$sensors.nodeid",
"value":"$sensors.value"
}
},
// Stage 4 - group and push it to array sensors
{
$group: {
"_id":{nodeid:"$nodeid"},
"sensors": {"$addToSet":"$$CURRENT"}
}
}
]
as far as I got document structure, there is no need to use $lookup as all data is in readings(values) collection.
Please see proposed solution:
db.readings.aggregate([{
$match : {
nodeid : 681
}
},
{
$group : {
_id : {
type : "$type",
nodeid : "$nodeid"
},
readings : {
$push : {
timestamp : "$timestamp",
value : "$value",
id : "$_id"
}
}
}
}, {
$project : {
_id : "$_id",
readings : {
$slice : ["$readings", -1]
}
}
}, {
$unwind : "$readings"
}, {
$project : {
_id : "$readings.id",
type : "$_id.type",
nodeid : "$_id.nodeid",
timestamp : "$readings.timestamp",
value : "$readings.value",
}
}, {
$group : {
_id : "$nodeid",
sensors : {
$push : {
_id : "$_id",
timestamp : "$timestamp",
value : "$value",
type:"$type"
}
}
}
}
])
and output:
{
"_id" : 681,
"sensors" : [
{
"_id" : ObjectId("110cc9c6ac55850d5740784e"),
"timestamp" : ISODate("2016-04-09T12:06:46.344Z"),
"value" : 12,
"type" : "foo"
},
{
"_id" : ObjectId("190ac8b6ac55850d5740776e"),
"timestamp" : ISODate("2016-04-12T12:06:46.344Z"),
"value" : 20,
"type" : "bar"
}
]
}
Any comments welcome!

Find duplicate key in embedded sub document in mongodb

I am trying to craft a query that will allow me to find duplicate keys in subdocument in MongoDB.
It needs to be able to query any number of documents and see what keys are duplicated across them in a subdocument. The key of my subdocument is called attributes and I need to be able to target a particular query of documents and pull out duplicate attribute keys that they all share.
EDIT:
I forgot to mention that I do not know the names of the attributes ahead of time. I need to be able to essentially select distinct attributes that they share and aggregate the values.
Collection Sample:
[
{
sku: '123',
attributes: {
size: 'L',
custom: 7
}
},
{
sku: '456',
attributes: {
size: 'M'
}
},
{
sku: 'abc',
attributes: {
material: 'cotton'
size: 'S'
}
}
]
Desired Result (if possible):
{
size: [' S', 'M', 'L']
}
If the desired result is not possible I would at least like to be able to get back [ 'size' ]
This process needs to be optimized as much as possible and I just cant seem to get a query just right to return what I need, any help is greatly appreciated =)
Here is what I have so far
db.getCollection('myCollection').aggregate([
{ $match: {
_id: { $in: [ObjectId("55158b0bd6076278295cf022"), ObjectId("55158b0bd6076278295cf021"), ObjectId("55158b0bd6076278295cf01f") ] }
}
},
{ $project: { attributes: 1 }},
{ $group: { _id: '$attributes' } }
])
Which products this output:
{
"result" : [
{
"_id" : {
"shirt_size" : "S",
"shirt_color" : "Blue",
"custom_attr" : "adsfasdf"
}
},
{
"_id" : {
"shirt_size" : "M",
"shirt_color" : "Green"
}
},
{
"_id" : {
"shirt_size" : "L",
"shirt_color" : "Red"
}
}
],
"ok" : 1.0000000000000000,
"$gleStats" : {
"lastOpTime" : Timestamp(1427475045, 1),
"electionId" : ObjectId("54f7c1edf8e5ff44cec194b6")
}
}
I feel like it is close and I am just missing the last step :(
I think you need to $unwind the array, and then $group it and use $sum to count the appearance, then everything with sum > 1 is a duplicate.
Links:
http://docs.mongodb.org/manual/reference/operator/aggregation/unwind/
http://docs.mongodb.org/manual/reference/operator/aggregation/group/
http://docs.mongodb.org/manual/reference/operator/aggregation/sum/
The $addToSet(aggregation) returns an array of unique values - http://docs.mongodb.org/manual/reference/operator/aggregation/addToSet/
Using the following aggregation (get unique sizes per Doc):
db.coll1.aggregate([
{$unwind : "$testdoc"},
{$group : {_id: "$_id", size: {$addToSet: "$testdoc.attributes.size"}}}
])
Gives the following result:
{
"result" : [
{
"_id" : ObjectId("551621fe6155a7741a0d328a"),
"size" : [
"M",
"L"
]
},
{
"_id" : ObjectId("551621fe6155a7741a0d328b"),
"size" : [
"L"
]
},
{
"_id" : ObjectId("551621fe6155a7741a0d3289"),
"size" : [
"S",
"M",
"L"
]
}
],
"ok" : 1
}
The following aggregation returns unique sizes across all docs:
db.coll1.aggregate([
{$unwind : "$testdoc"},
{$group :
{_id: "AllSizes", size: {$addToSet: "$testdoc.attributes.size"}}} ])
Result:
{
"result" : [
{
"_id" : "AllSizes",
"size" : [
"S",
"M",
"L"
]
}
],
"ok" : 1
}
Based on the following Docs:
> db.coll1.find().pretty()
{
"_id" : ObjectId("551621fe6155a7741a0d3289"),
"testdoc" : [
{
"sku" : "123",
"attributes" : {
"size" : "L",
"custom" : 7
}
},
{
"sku" : "456",
"attributes" : {
"size" : "M"
}
},
{
"sku" : "abc",
"attributes" : {
"material" : "cotton",
"size" : "S"
}
}
]
}
{
"_id" : ObjectId("551621fe6155a7741a0d328a"),
"testdoc" : [
{
"sku" : "123",
"attributes" : {
"size" : "L",
"custom" : 7
}
},
{
"sku" : "456",
"attributes" : {
"size" : "M"
}
},
{
"sku" : "abc",
"attributes" : {
"material" : "cotton",
"size" : "M"
}
}
]
}
{
"_id" : ObjectId("551621fe6155a7741a0d328b"),
"testdoc" : [
{
"sku" : "123",
"attributes" : {
"size" : "L",
"custom" : 7
}
},
{
"sku" : "456",
"attributes" : {
"size" : "L"
}
},
{
"sku" : "abc",
"attributes" : {
"material" : "cotton",
"size" : "L"
}
}
]
}

Aggregate of different subtypes in document of a collection

abstract document in collection md given:
{
vals : [{
uid : string,
val : string|array
}]
}
the following, partially correct aggregation is given:
db.md.aggregate(
{ $unwind : "$vals" },
{ $match : { "vals.uid" : { $in : ["x", "y"] } } },
{
$group : {
_id : { uid : "$vals.uid" },
vals : { $addToSet : "$vals.val" }
}
}
);
that may lead to the following result:
"result" : [
{
"_id" : {
"uid" : "x"
},
"vals" : [
[
"24ad52bc-c414-4349-8f3a-24fd5520428e",
"e29dec2f-57d2-43dc-818a-1a6a9ec1cc64"
],
[
"5879b7a4-b564-433e-9a3e-49998dd60b67",
"24ad52bc-c414-4349-8f3a-24fd5520428e"
]
]
},
{
"_id" : {
"uid" : "y"
},
"vals" : [
"0da5fcaa-8d7e-428b-8a84-77c375acea2b",
"1721cc92-c4ee-4a19-9b2f-8247aa53cfe1",
"5ac71a9e-70bd-49d7-a596-d317b17e4491"
]
}
]
as x is the result aggregated on documents containing an array rather than a string, the vals in the result is an array of arrays. what i look for in this case is to have a flattened array (like the result for y).
for me it seems like that what i want to achieve by one aggegration call only, is currently not supported by any given operation as e.g. a type conversion cannot be done or unwind expectes in every case an array as input type.
is map reduce the only option i have? if not ... any hints?
thanks!
You can use the aggregation to do the computation you want without changing your schema (though you might consider changing your schema simply to make queries and aggregations of this field easier to write).
I broke up the pipeline into multiple steps for readability. I also simplified your document slightly, again for readability.
Sample input:
> db.md.find().pretty()
{
"_id" : ObjectId("512f65c6a31a92aae2a214a3"),
"uid" : "x",
"val" : "string"
}
{
"_id" : ObjectId("512f65c6a31a92aae2a214a4"),
"uid" : "x",
"val" : "string"
}
{
"_id" : ObjectId("512f65c6a31a92aae2a214a5"),
"uid" : "y",
"val" : "string2"
}
{
"_id" : ObjectId("512f65e8a31a92aae2a214a6"),
"uid" : "y",
"val" : [
"string3",
"string4"
]
}
{
"_id" : ObjectId("512f65e8a31a92aae2a214a7"),
"uid" : "z",
"val" : [
"string"
]
}
{
"_id" : ObjectId("512f65e8a31a92aae2a214a8"),
"uid" : "y",
"val" : [
"string1",
"string2"
]
}
Pipeline stages:
> project1 = {
"$project" : {
"uid" : 1,
"val" : 1,
"isArray" : {
"$cond" : [
{
"$eq" : [
"$val.0",
[ ]
]
},
true,
false
]
}
}
}
> project2 = {
"$project" : {
"uid" : 1,
"valA" : {
"$cond" : [
"$isArray",
"$val",
[
null
]
]
},
"valS" : {
"$cond" : [
"$isArray",
null,
"$val"
]
},
"isArray" : 1
}
}
> unwind = { "$unwind" : "$valA" }
> project3 = {
"$project" : {
"_id" : 0,
"uid" : 1,
"val" : {
"$cond" : [
"$isArray",
"$valA",
"$valS"
]
}
}
}
Final aggregation:
> db.md.aggregate(project1, project2, unwind, project3, group)
{
"result" : [
{
"_id" : "z",
"vals" : [
"string"
]
},
{
"_id" : "y",
"vals" : [
"string1",
"string4",
"string3",
"string2"
]
},
{
"_id" : "x",
"vals" : [
"string"
]
}
],
"ok" : 1
}
If you modify your schema using always "vals.val" field as an array field (even when the record contains only one element) you can do it easily as follows:
db.test_col.insert({
vals : [
{
uid : "uuid1",
val : ["value1"]
},
{
uid : "uuid2",
val : ["value2", "value3"]
}]
});
db.test_col.insert(
{
vals : [{
uid : "uuid2",
val : ["value4", "value5"]
}]
});
Using this approach you only need to use two $unwind operations: one unwinds the "parent" array and the second unwinds every "vals.val" value. So, querying like
db.test_col.aggregate(
{ $unwind : "$vals" },
{ $unwind : "$vals.val" },
{
$group : {
_id : { uid : "$vals.uid" },
vals : { $addToSet : "$vals.val" }
}
}
);
You can obtain your expected value:
{
"result" : [
{
"_id" : {
"uid" : "uuid2"
},
"vals" : [
"value5",
"value4",
"value3",
"value2"
]
},
{
"_id" : {
"uid" : "uuid1"
},
"vals" : [
"value1"
]
}
],
"ok" : 1
}
And no, you can't execute this query using your current schema, since $unwind fails when the field isn't an array field.