Project only some fields of array items in sub document - mongodb

How can I project only particular fields of items in array in sub document?
Consider the following (simplified) example:
{
"_id" : ObjectId("573d70df080cc2cbe8bf3222"),
"name" : "Nissan",
"models" : [
{
"name" : "Altima",
"body" : {
"type" : 2,
"maxprice" : 31800.00,
"minprice" : 21500.00
}
},
{
"name" : "Maxima",
"body" : {
"type" : 2,
"maxprice" : 39200.00,
"minprice" : 28800.00
}
}
]
},
{
"_id" : ObjectId("80cc2cbe8bf3222573d70df0"),
"name" : "Honda",
"models" : [
{
"name" : "Accord",
"body" : {
"type" : 2,
"maxprice" : 34100.00,
"minprice" : 20400.00
}
},
{
"name" : "Civic",
"body" : {
"type" : 3,
"maxprice" : 27900.00,
"minprice" : 19800.00
}
}
]
}
After aggregation, I'd like to get the following output:
{
"_id" : ObjectId("573d70df080cc2cbe8bf3222"),
"name" : "Nissan",
"models" : [
{
"type" : 2,
"minprice" : 21500.00
},
{
"type" : 2,
"minprice" : 28800.00
}
]
},
{
"_id" : ObjectId("80cc2cbe8bf3222573d70df0"),
"name" : "Honda",
"models" : [
{
"type" : 2,
"minprice" : 20400.00
},
{
"type" : 3,
"minprice" : 19800.00
}
]
}
So it basically gets all documents, all fields of documents, all items in models array, BUT only some fields of the array items in models. Please help.

You need to $project the "models" field using the $map operator.
db.collection.aggregate([
{ "$project": {
"name": 1,
"models": {
"$map": {
"input": "$models",
"as": "m",
"in": {
"type": "$$m.body.type",
"minprice": "$$m.body.minprice"
}
}
}
}}
])

$unwind is your friend
First you can basically filter the (non nested) fields you want.
var projection = {$project:{name:'$name', models:'$models'}};
db.dum.aggregate(projection)
Foreach of your models, you issue a document
var unwindModels = {$unwind:{'$models'}}
db.dum.aggregate(projection, unwindModels)
The idea is that every document issued from your models field will be regrouped later on via the _id field.
Foreach document, you only keep the (sub)fields you want
var keepSubFields = {$project:{name:'$name', type:'$models.body.type', minprice:'$models.body.minprice'}}
db.dum.aggregate(projection, unwindModels, keepSubFields)
Then you reaggregate your models as an array (thanks to the _id of each record which tracks the original record)
var aggregateModels = {$group:{_id:'$_id', name:{$last:'$name'}, models:{$push:{type:'$type', minprice:'$minprice'}}}}
db.dum.aggregate(projection, unwindModels, keepSubFields, aggregateModels)
note1: Here we can use $last because our primary key is not _id but <_id, name>. ($first would be good too)
note2: we refer type by $type, because when you iterate the collection on the aggregateModels stage, your record is of the form
<_id, name, type, minprice>

Related

Embed root field in a subdocument within an aggregation pipeline

Maybe someone can help me with Mongo's Aggregation Pipeline. I am trying to put an object in another object but I'm new to Mongo and ist very difficult:
{
"_id" : ObjectId("5888a74f137ed66828367585"),
"name" : "Unis",
"tags" : [...],
"editable" : true,
"token" : "YfFzaoNvWPbvyUmSulXfMPq4a9QgGxN1ElIzAUmSJRX4cN7zCl",
"columns" : [...],
"description" : "...",
"sites" : {
"_id" : ObjectId("5888ae2f137ed668fb95a03d"),
"url" : "www.....de",
"column_values" : [
"University XXX",
"XXX",
"false"
],
"list_id" : ObjectId("5888a74f137ed66828367585")
},
"scan" : [
{
"_id" : ObjectId("5888b1074e2123c22ae7f4d3"),
"site_id" : ObjectId("5888ae2f137ed668fb95a03d"),
"scan_group_id" : ObjectId("5888a970a7f75fbd49052ed6"),
"date" : ISODate("2017-01-18T16:00:00Z"),
"score" : "B",
"https" : false,
"cookies" : 12
}
]
}
I want to put every object in the "scan"-array into "sites". So that it looks like this:
{
"_id" : ObjectId("5888a74f137ed66828367585"),
"name" : "Unis",
"tags" : [...],
"editable" : true,
"token" : "YfFzaoNvWPbvyUmSulXfMPq4a9QgGxN1ElIzAUmSJRX4cN7zCl",
"columns" : [...],
"description" : "...",
"sites" : {
"_id" : ObjectId("5888ae2f137ed668fb95a03d"),
"url" : "www.....de",
"column_values" : [
"University XXX",
"XXX",
"false"
],
"list_id" : ObjectId("5888a74f137ed66828367585"),
"scan" : [
{
"_id" : ObjectId("5888b1074e2123c22ae7f4d3"),
"site_id" : ObjectId("5888ae2f137ed668fb95a03d"),
"scan_group_id" : ObjectId("5888a970a7f75fbd49052ed6"),
"date" : ISODate("2017-01-18T16:00:00Z"),
"score" : "B",
"https" : false,
"cookies" : 12
}
]
}
}
Is there a step in the aggregation pipeline to perform this task?
With a single pipeline I don't see any other way but specifying each field individually as:
db.collection.aggregate([
{
"$project": {
"name": 1, "tags": 1,
"editable": 1,
"token": 1, "columns": 1,
"description": 1,
"sites._id": "$sites._id",
"sites.url": "$sites.url" ,
"sites.column_values": "$sites.column_values" ,
"sites.list_id": "$sites.list_id",
"sites.scan": "$scan"
}
}
])
With MongoDB 3.4 and newer, you can use the $addFields pipeline step instead of specifying all fields using $project. The advantage is that it adds new fields to documents and outputs documents that contain all existing fields from the input documents and the newly added fields:
db.collection.aggregate([
{
"$addFields": {
"sites._id": "$sites._id",
"sites.url": "$sites.url" ,
"sites.column_values": "$sites.column_values" ,
"sites.list_id": "$sites.list_id",
"sites.scan": "$scan"
}
}, { "$project": { "scan": 0 } }
])

Reference multiple fields with aggregation function

Let's say I have some mongo DB query which returns following two documents. (I am using aggregation & projection which returns me this result set).
{
"name" : {
"value" : "ANDERSON"
},
"ID" : {
"value" : "2356"
},
}
{
"employeename" : {
"value" : "DAVID"
},
"ID" : {
"value" : "2356"
},
}
My DB is schema less & I am storing attributes and there values. There are multiple attributes which represents the same information. For e.g. here "name" & "employeename" represents the same thing. I want the final output in some common attribute (say "Employee Name"). This common attribute can have value either from "name" or "employeename".
I think this problem can be solved by adding one more pipe in with the aggregation. I tried $or (it returns true/false not the value)
db.getCollection('mycollection').aggregate([
{ "$project" : {
"name" : 1,
"ID" : 1, "employeename" : 1
}},
{ "$project":{
"Employee Name": {$or : ["$name", "$employeename"]}
}}
])
Final Output should be
{
" Employee Name" : {
"value" : "ANDERSON"
},
"ID" : {
"value" : "2356"
},
}
{
" Employee Name" : {
"value" : "DAVID"
},
"ID" : {
"value" : "2356"
},
}
Can somebody tell me how to write this mongo DB command?
What you want is the $ifNull operator, you can also shorten your pipeline to one $project stage.
db.getCollection('mycollection').aggregate([
{ "$project" : {
"EmployeeName" : { "$ifNull": [ "$name", "$employeename" ] },
"ID" : 1,
}}
])

Find from another find in mongodb

In Mysql I can do a Select from another Select.
So I would ask if I can do the same in Mongodb.
For more explanation, I need to retreive the transaction of the a specific userOwner with just the last dateTransaction in the history object of this collection.So if this userOwner isn't in the last history we shoudn't retreive this transaction.
I used at first this query:
#Query(value = "{'history':{'$elemMatch':{'userOwner': ?0}}}")
but it returns all the elements even those where this userOwner isn't with the last "dateTransaction".
So I aim to de a query that it returns for each transaction just the last dateTransaction with userOwner of the "history" array :
.find({},{dateTransaction: {$slice: 1} }).limit(1)
and after do another query from this one.
Anyone has an idea or examples.
Tnx
This is my Collection called "piece":
{
"_id" : ObjectId("1"),
"history" : [{
"userOwner" : "3",
"dateTransaction" : ISODate("2016-05-30T00:00:00.000+0000"),
}, {
"userOwner" : "1",
"dateTransaction" : ISODate("2016-05-26T00:00:00.000+0000"),
}, {
"userOwner" : "2",
"dateTransaction" : ISODate("2016-05-23T00:00:00.000+0000"),
}
]
}{
"_id" : ObjectId("2"),
"transactions" : [{
"userOwner" : "2",
"dateTransaction" : ISODate("2016-05-26T00:00:00.000+0000"),
}, {
"userOwner" : "3",
"dateTransaction" : ISODate("2016-05-15T00:00:00.000+0000"),
}
]
}{
"_id" : ObjectId("3"),
"transactions" : [{
"userOwner" : "2",
"dateTransaction" : ISODate("2016-05-26T00:00:00.000+0000"),
}, {
"userOwner" : "1",
"dateTransaction" : ISODate("2016-05-15T00:00:00.000+0000"),
}
]
}
As example the result for the userOwner 2 should be :
{
"_id" : ObjectId("2"),
"transactions" : [{
"userOwner" : "2",
"dateTransaction" : ISODate("2016-05-26T00:00:00.000+0000"),
}, {
"userOwner" : "3",
"dateTransaction" : ISODate("2016-05-15T00:00:00.000+0000"),
}
]
}{
"_id" : ObjectId("3"),
"transactions" : [{
"userOwner" : "2",
"dateTransaction" : ISODate("2016-05-26T00:00:00.000+0000"),
}, {
"userOwner" : "1",
"dateTransaction" : ISODate("2016-05-15T00:00:00.000+0000"),
}
]
}
it looks like your data is stored in one collection - so this is a kind of bad design as it needs more overhead to work with....
below aggregation query which has a lookup to the same collection to match data for user:2
var unwind = {
$unwind : "$history"
}
var matchUser = {
$match : {
"history.userOwner" : "2"
}
}
var lookUp = {
$lookup : {
from : "shm",
localField : "userOwner",
foreignField : "userOwner",
as : "t"
}
}
var unwindTransactions = {
$unwind : "$t"
}
var unwindTransactions2 = {
$unwind : "$t.transactions"
}
var match2 = {
$match : {
"t.transactions.userOwner" : "2"
}
}
var project = {
$project : {
_id : 0,
recordId : "$_id",
transactionId : "$t._id",
dateOfTransaction : "$history.dateTransaction",
userOwner : "$history.userOwner",
}
}
db.shm.aggregate([unwind,
matchUser,
lookUp,
unwindTransactions,
unwindTransactions2,
match2,
project
])
and as a result we have two records of user transactions
{
"recordId" : ObjectId("575e7e8b852cb76369c9e446"),
"transactionId" : ObjectId("575e7e8b852cb76369c9e447"),
"dateOfTransaction" : ISODate("2016-05-23T00:00:00.000Z"),
"userOwner" : "2"
},{
"recordId" : ObjectId("575e7e8b852cb76369c9e446"),
"transactionId" : ObjectId("575e7e8b852cb76369c9e448"),
"dateOfTransaction" : ISODate("2016-05-23T00:00:00.000Z"),
"userOwner" : "2"
}
Please consider split of that collection as in current form will give you a lot of headache when processing more complex queries

In MongoDb, how to apply sort internal fields present in document?

My document looks like this
{
field1: somevalue,
name:xtz
nested_documents: [ // array of nested document
{ x:"1", y:"2" }, // first nested document
{ x:"2", y:"3" }, // second nested document
{ x:"-1", y:"3" }, // second nested document
// ...many more nested documents
]
}
How one can sort the data present in nested_documents?
Expected answer is shown below:
nested_documents: [ { x:"-1", y:"3" },{ x:"1", y:"2" },{ x:"2", y:"3" }]
To do this you would have to use the aggregation framework
db.test.aggregate([{$unwind:'$nested_documents'},{$sort:{'nested_documents.x':
1}}])
this returns
"result" : [
{
"_id" : ObjectId("5139ba3dcd4e11c83f4cea12"),
"field1" : "somevalue",
"name" : "xtz",
"nested_documents" : {
"x" : "-1",
"y" : "3"
}
},
{
"_id" : ObjectId("5139ba3dcd4e11c83f4cea12"),
"field1" : "somevalue",
"name" : "xtz",
"nested_documents" : {
"x" : "1",
"y" : "2"
}
},
{
"_id" : ObjectId("5139ba3dcd4e11c83f4cea12"),
"field1" : "somevalue",
"name" : "xtz",
"nested_documents" : {
"x" : "2",
"y" : "3"
}
}
],
"ok" : 1
Hope this helps

Aggregate of different subtypes in document of a collection

abstract document in collection md given:
{
vals : [{
uid : string,
val : string|array
}]
}
the following, partially correct aggregation is given:
db.md.aggregate(
{ $unwind : "$vals" },
{ $match : { "vals.uid" : { $in : ["x", "y"] } } },
{
$group : {
_id : { uid : "$vals.uid" },
vals : { $addToSet : "$vals.val" }
}
}
);
that may lead to the following result:
"result" : [
{
"_id" : {
"uid" : "x"
},
"vals" : [
[
"24ad52bc-c414-4349-8f3a-24fd5520428e",
"e29dec2f-57d2-43dc-818a-1a6a9ec1cc64"
],
[
"5879b7a4-b564-433e-9a3e-49998dd60b67",
"24ad52bc-c414-4349-8f3a-24fd5520428e"
]
]
},
{
"_id" : {
"uid" : "y"
},
"vals" : [
"0da5fcaa-8d7e-428b-8a84-77c375acea2b",
"1721cc92-c4ee-4a19-9b2f-8247aa53cfe1",
"5ac71a9e-70bd-49d7-a596-d317b17e4491"
]
}
]
as x is the result aggregated on documents containing an array rather than a string, the vals in the result is an array of arrays. what i look for in this case is to have a flattened array (like the result for y).
for me it seems like that what i want to achieve by one aggegration call only, is currently not supported by any given operation as e.g. a type conversion cannot be done or unwind expectes in every case an array as input type.
is map reduce the only option i have? if not ... any hints?
thanks!
You can use the aggregation to do the computation you want without changing your schema (though you might consider changing your schema simply to make queries and aggregations of this field easier to write).
I broke up the pipeline into multiple steps for readability. I also simplified your document slightly, again for readability.
Sample input:
> db.md.find().pretty()
{
"_id" : ObjectId("512f65c6a31a92aae2a214a3"),
"uid" : "x",
"val" : "string"
}
{
"_id" : ObjectId("512f65c6a31a92aae2a214a4"),
"uid" : "x",
"val" : "string"
}
{
"_id" : ObjectId("512f65c6a31a92aae2a214a5"),
"uid" : "y",
"val" : "string2"
}
{
"_id" : ObjectId("512f65e8a31a92aae2a214a6"),
"uid" : "y",
"val" : [
"string3",
"string4"
]
}
{
"_id" : ObjectId("512f65e8a31a92aae2a214a7"),
"uid" : "z",
"val" : [
"string"
]
}
{
"_id" : ObjectId("512f65e8a31a92aae2a214a8"),
"uid" : "y",
"val" : [
"string1",
"string2"
]
}
Pipeline stages:
> project1 = {
"$project" : {
"uid" : 1,
"val" : 1,
"isArray" : {
"$cond" : [
{
"$eq" : [
"$val.0",
[ ]
]
},
true,
false
]
}
}
}
> project2 = {
"$project" : {
"uid" : 1,
"valA" : {
"$cond" : [
"$isArray",
"$val",
[
null
]
]
},
"valS" : {
"$cond" : [
"$isArray",
null,
"$val"
]
},
"isArray" : 1
}
}
> unwind = { "$unwind" : "$valA" }
> project3 = {
"$project" : {
"_id" : 0,
"uid" : 1,
"val" : {
"$cond" : [
"$isArray",
"$valA",
"$valS"
]
}
}
}
Final aggregation:
> db.md.aggregate(project1, project2, unwind, project3, group)
{
"result" : [
{
"_id" : "z",
"vals" : [
"string"
]
},
{
"_id" : "y",
"vals" : [
"string1",
"string4",
"string3",
"string2"
]
},
{
"_id" : "x",
"vals" : [
"string"
]
}
],
"ok" : 1
}
If you modify your schema using always "vals.val" field as an array field (even when the record contains only one element) you can do it easily as follows:
db.test_col.insert({
vals : [
{
uid : "uuid1",
val : ["value1"]
},
{
uid : "uuid2",
val : ["value2", "value3"]
}]
});
db.test_col.insert(
{
vals : [{
uid : "uuid2",
val : ["value4", "value5"]
}]
});
Using this approach you only need to use two $unwind operations: one unwinds the "parent" array and the second unwinds every "vals.val" value. So, querying like
db.test_col.aggregate(
{ $unwind : "$vals" },
{ $unwind : "$vals.val" },
{
$group : {
_id : { uid : "$vals.uid" },
vals : { $addToSet : "$vals.val" }
}
}
);
You can obtain your expected value:
{
"result" : [
{
"_id" : {
"uid" : "uuid2"
},
"vals" : [
"value5",
"value4",
"value3",
"value2"
]
},
{
"_id" : {
"uid" : "uuid1"
},
"vals" : [
"value1"
]
}
],
"ok" : 1
}
And no, you can't execute this query using your current schema, since $unwind fails when the field isn't an array field.