Embed root field in a subdocument within an aggregation pipeline - mongodb

Maybe someone can help me with Mongo's Aggregation Pipeline. I am trying to put an object in another object but I'm new to Mongo and ist very difficult:
{
"_id" : ObjectId("5888a74f137ed66828367585"),
"name" : "Unis",
"tags" : [...],
"editable" : true,
"token" : "YfFzaoNvWPbvyUmSulXfMPq4a9QgGxN1ElIzAUmSJRX4cN7zCl",
"columns" : [...],
"description" : "...",
"sites" : {
"_id" : ObjectId("5888ae2f137ed668fb95a03d"),
"url" : "www.....de",
"column_values" : [
"University XXX",
"XXX",
"false"
],
"list_id" : ObjectId("5888a74f137ed66828367585")
},
"scan" : [
{
"_id" : ObjectId("5888b1074e2123c22ae7f4d3"),
"site_id" : ObjectId("5888ae2f137ed668fb95a03d"),
"scan_group_id" : ObjectId("5888a970a7f75fbd49052ed6"),
"date" : ISODate("2017-01-18T16:00:00Z"),
"score" : "B",
"https" : false,
"cookies" : 12
}
]
}
I want to put every object in the "scan"-array into "sites". So that it looks like this:
{
"_id" : ObjectId("5888a74f137ed66828367585"),
"name" : "Unis",
"tags" : [...],
"editable" : true,
"token" : "YfFzaoNvWPbvyUmSulXfMPq4a9QgGxN1ElIzAUmSJRX4cN7zCl",
"columns" : [...],
"description" : "...",
"sites" : {
"_id" : ObjectId("5888ae2f137ed668fb95a03d"),
"url" : "www.....de",
"column_values" : [
"University XXX",
"XXX",
"false"
],
"list_id" : ObjectId("5888a74f137ed66828367585"),
"scan" : [
{
"_id" : ObjectId("5888b1074e2123c22ae7f4d3"),
"site_id" : ObjectId("5888ae2f137ed668fb95a03d"),
"scan_group_id" : ObjectId("5888a970a7f75fbd49052ed6"),
"date" : ISODate("2017-01-18T16:00:00Z"),
"score" : "B",
"https" : false,
"cookies" : 12
}
]
}
}
Is there a step in the aggregation pipeline to perform this task?

With a single pipeline I don't see any other way but specifying each field individually as:
db.collection.aggregate([
{
"$project": {
"name": 1, "tags": 1,
"editable": 1,
"token": 1, "columns": 1,
"description": 1,
"sites._id": "$sites._id",
"sites.url": "$sites.url" ,
"sites.column_values": "$sites.column_values" ,
"sites.list_id": "$sites.list_id",
"sites.scan": "$scan"
}
}
])
With MongoDB 3.4 and newer, you can use the $addFields pipeline step instead of specifying all fields using $project. The advantage is that it adds new fields to documents and outputs documents that contain all existing fields from the input documents and the newly added fields:
db.collection.aggregate([
{
"$addFields": {
"sites._id": "$sites._id",
"sites.url": "$sites.url" ,
"sites.column_values": "$sites.column_values" ,
"sites.list_id": "$sites.list_id",
"sites.scan": "$scan"
}
}, { "$project": { "scan": 0 } }
])

Related

Bring nested value to top level in MongoDB

I want to reshape a MongoDB document as followed:
This is how it looks at the moment:
{
"_id" : ObjectId("5a96d4a0af792cca1ec7d8cb"),
"Test" : "ABC",
"DATE" : "2018-02-28T16:04:55.00+01:00",
"Header" : "212735699",
"TraceIds" : [
{
"Id" : 1,
"Names" : [
{
"LangCode" : "de",
"CountryCode" : "DE",
"Text" : "Komponente"
},
{
"LangCode" : "en",
"CountryCode" : "US",
"Text" : "Component"
},
],
"Values" : [
{
"AId" : 1,
"Names" : [
{
"LangCode" : "de",
"CountryCode" : "DE",
"Text" : "Teil"
},
{
"LangCode" : "en",
"CountryCode" : "US",
"Text" : "Part"
},
],
"Value" : "1263000118"
},
]
}
],
Now I want to bring up the Value 1263000118 and give it the german name "Teil". All the other embedded values should vanish. So it should look like this:
{"_id" : ObjectId("5a96d4a0af792cca1ec7d8cb"),
"Test" : "ABC",
"DATE" : "2018-02-28T16:04:55.00+01:00",
"Header" : "212735699",
"Teil" : "1263000118",}
Would be great if someone could help me out here. Thanks
Use the aggregation framework :
db.collection.aggregate([
{$match: {
_id: your_id,
// Treatment to select which nested value you want
}},
{$project: {
_id: "$_id",
Test: "$Test",
DATE: "$DATE",
Header: "$Header",
Teil: "$Values.$.Value
}}
]);
Something like this should work

Mongodb : get whether a document is the latest with a field value and filter on the result

I am trying to port an existing SQL schema into Mongo.
We have document tables, with sometimes several times the same document, with a different revision but the same reference. I want to get only the latest revisions of the documents.
A sample input data:
{
"Uid" : "xxx",
"status" : "ACCEPTED",
"reference" : "DOC305",
"code" : "305-D",
"title" : "Document 305",
"creationdate" : ISODate("2011-11-24T15:13:28.887Z"),
"creator" : "X"
},
{
"Uid" : "xxx",
"status" : "COMMENTED",
"reference" : "DOC306",
"code" : "306-A",
"title" : "Document 306",
"creationdate" : ISODate("2011-11-28T07:23:18.807Z"),
"creator" : "X"
},
{
"Uid" : "xxx",
"status" : "COMMENTED",
"reference" : "DOC306",
"code" : "306-B",
"title" : "Document 306",
"creationdate" : ISODate("2011-11-28T07:26:49.447Z"),
"creator" : "X"
},
{
"Uid" : "xxx",
"status" : "ACCEPTED",
"reference" : "DOC501",
"code" : "501-A",
"title" : "Document 501",
"creationdate" : ISODate("2011-11-19T06:30:35.757Z"),
"creator" : "X"
},
{
"Uid" : "xxx",
"status" : "ACCEPTED",
"reference" : "DOC501",
"code" : "501-B",
"title" : "Document 501",
"creationdate" : ISODate("2011-11-19T06:40:32.957Z"),
"creator" : "X"
}
Given this data, I want this result set (sometimes I want only the last revision, sometimes I want all revisions with an attribute telling me whether it's the latest):
{
"Uid" : "xxx",
"status" : "ACCEPTED",
"reference" : "DOC305",
"code" : "305-D",
"title" : "Document 305",
"creationdate" : ISODate("2011-11-24T15:13:28.887Z"),
"creator" : "X",
"lastrev" : true
},
{
"Uid" : "xxx",
"status" : "COMMENTED",
"reference" : "DOC306",
"code" : "306-B",
"title" : "Document 306",
"creationdate" : ISODate("2011-11-28T07:26:49.447Z"),
"creator" : "X",
"lastrev" : true
},
{
"Uid" : "xxx",
"status" : "ACCEPTED",
"reference" : "DOC501",
"code" : "501-B",
"title" : "Document 501",
"creationdate" : ISODate("2011-11-19T06:40:32.957Z"),
"creator" : "X",
"lastrev" : true
}
I already have a bunch of filters, sorting, and skip/limit (for pagination of data), so the final result set should be mindful of these constraints.
The current "find" query (built with the .Net driver), which filters fine but gives me all revisions of each document:
coll.find(
{ "$and" : [
{ "$or" : [
{ "deletedid" : { "$exists" : false } },
{ "deletedid" : null }
] },
{ "$or" : [
{ "taskid" : { "$exists" : false } },
{ "taskid" : null }
] },
{ "objecttypeuid" : { "$in" : ["xxxxx"] } }
] },
{ "_id" : 0, "Uid" : 1, "lastrev" : 1, "title" : 1, "code" : 1, "creator" : 1, "owner" : 1, "modificator" : 1, "status" : 1, "reference": 1, "creationdate": 1 }
).sort({ "creationdate" : 1 }).skip(0).limit(10);
Using another question, I have been able to build this aggregation, which gives me the latest revision of each document, but with not enough attributes in the result:
coll.aggregate([
{ $sort: { "creationdate": 1 } },
{
$group: {
"_id": "$reference",
result: { $last: "$creationdate" },
creationdate: { $last: "$creationdate" }
}
}
]);
I would like to integrating the aggregate with the find query.
I have found the way to mix aggregation and filtering:
coll.aggregate(
[
{ $match: {
"$and" : [
{ "$or" : [
{ "deletedid" : { "$exists" : false } },
{ "deletedid" : null }
] },
{ "$or" : [
{ "taskid" : { "$exists" : false } },
{ "taskid" : null }
] },
{ "objecttypeuid" : { "$in" : ["xxx"] } }
]
}
},
{ $sort: { "creationdate": 1 } },
{ $group: {
"_id": "$reference",
"doc": { "$last": "$$ROOT" }
}
},
{ $sort: { "doc.creationdate": 1 } },
{ $skip: skip },
{ $limit: limit }
],
{ allowDiskUse: true }
);
For each result node, this gives me a "doc" node with the document data. It has too much data still (it's missing projections), but it's a start.
Translated in .Net:
FilterDefinitionBuilder<BsonDocument> filterBuilder = Builders<BsonDocument>.Filter;
FilterDefinition<BsonDocument> filters = filterBuilder.Empty;
filters = filters & (filterBuilder.Not(filterBuilder.Exists("deletedid")) | filterBuilder.Eq("deletedid", BsonNull.Value));
filters = filters & (filterBuilder.Not(filterBuilder.Exists("taskid")) | filterBuilder.Eq("taskid", BsonNull.Value));
foreach (var f in fieldFilters) {
filters = filters & filterBuilder.In(f.Key, f.Value);
}
var sort = Builders<BsonDocument>.Sort.Ascending(orderby);
var group = new BsonDocument {
{ "_id", "$reference" },
{ "doc", new BsonDocument("$last", "$$ROOT") }
};
var aggregate = coll.Aggregate(new AggregateOptions { AllowDiskUse = true })
.Match(filters)
.Sort(sort)
.Group(group)
.Sort(sort)
.Skip(skip)
.Limit(rows);
return aggregate.ToList();
I'm pretty sure there are better ways to do this, though.
You answer is pretty close. Instead of $last, $max is better.
About $last operator:
Returns the value that results from applying an expression to the last document in a group of documents that share the same group by a field. Only meaningful when documents are in a defined order.
Get the last revision in each group, see code below in mongo shell:
db.collection.aggregate([
{
$group: {
_id: '$reference',
doc: {
$max: {
"creationdate" : "$creationdate",
"code" : "$code",
"Uid" : "$Uid",
"status" : "$status",
"title" : "$title",
"creator" : "$creator"
}
}
}
},
{
$project: {
_id: 0,
Uid: "$doc.Uid",
status: "$doc.status",
reference: "$_id",
code: "$doc.code",
title: "$doc.title",
creationdate: "$doc.creationdate",
creator: "$doc.creator"
}
}
]).pretty()
The output as your expect:
{
"Uid" : "xxx",
"status" : "ACCEPTED",
"reference" : "DOC501",
"code" : "501-B",
"title" : "Document 501",
"creationdate" : ISODate("2011-11-19T06:40:32.957Z"),
"creator" : "X"
}
{
"Uid" : "xxx",
"status" : "COMMENTED",
"reference" : "DOC306",
"code" : "306-B",
"title" : "Document 306",
"creationdate" : ISODate("2011-11-28T07:26:49.447Z"),
"creator" : "X"
}
{
"Uid" : "xxx",
"status" : "ACCEPTED",
"reference" : "DOC305",
"code" : "305-D",
"title" : "Document 305",
"creationdate" : ISODate("2011-11-24T15:13:28.887Z"),
"creator" : "X"
}

Project only some fields of array items in sub document

How can I project only particular fields of items in array in sub document?
Consider the following (simplified) example:
{
"_id" : ObjectId("573d70df080cc2cbe8bf3222"),
"name" : "Nissan",
"models" : [
{
"name" : "Altima",
"body" : {
"type" : 2,
"maxprice" : 31800.00,
"minprice" : 21500.00
}
},
{
"name" : "Maxima",
"body" : {
"type" : 2,
"maxprice" : 39200.00,
"minprice" : 28800.00
}
}
]
},
{
"_id" : ObjectId("80cc2cbe8bf3222573d70df0"),
"name" : "Honda",
"models" : [
{
"name" : "Accord",
"body" : {
"type" : 2,
"maxprice" : 34100.00,
"minprice" : 20400.00
}
},
{
"name" : "Civic",
"body" : {
"type" : 3,
"maxprice" : 27900.00,
"minprice" : 19800.00
}
}
]
}
After aggregation, I'd like to get the following output:
{
"_id" : ObjectId("573d70df080cc2cbe8bf3222"),
"name" : "Nissan",
"models" : [
{
"type" : 2,
"minprice" : 21500.00
},
{
"type" : 2,
"minprice" : 28800.00
}
]
},
{
"_id" : ObjectId("80cc2cbe8bf3222573d70df0"),
"name" : "Honda",
"models" : [
{
"type" : 2,
"minprice" : 20400.00
},
{
"type" : 3,
"minprice" : 19800.00
}
]
}
So it basically gets all documents, all fields of documents, all items in models array, BUT only some fields of the array items in models. Please help.
You need to $project the "models" field using the $map operator.
db.collection.aggregate([
{ "$project": {
"name": 1,
"models": {
"$map": {
"input": "$models",
"as": "m",
"in": {
"type": "$$m.body.type",
"minprice": "$$m.body.minprice"
}
}
}
}}
])
$unwind is your friend
First you can basically filter the (non nested) fields you want.
var projection = {$project:{name:'$name', models:'$models'}};
db.dum.aggregate(projection)
Foreach of your models, you issue a document
var unwindModels = {$unwind:{'$models'}}
db.dum.aggregate(projection, unwindModels)
The idea is that every document issued from your models field will be regrouped later on via the _id field.
Foreach document, you only keep the (sub)fields you want
var keepSubFields = {$project:{name:'$name', type:'$models.body.type', minprice:'$models.body.minprice'}}
db.dum.aggregate(projection, unwindModels, keepSubFields)
Then you reaggregate your models as an array (thanks to the _id of each record which tracks the original record)
var aggregateModels = {$group:{_id:'$_id', name:{$last:'$name'}, models:{$push:{type:'$type', minprice:'$minprice'}}}}
db.dum.aggregate(projection, unwindModels, keepSubFields, aggregateModels)
note1: Here we can use $last because our primary key is not _id but <_id, name>. ($first would be good too)
note2: we refer type by $type, because when you iterate the collection on the aggregateModels stage, your record is of the form
<_id, name, type, minprice>

Mongodb aggregate match array item with child array item

I would like to find documents that contains specific values in a child array.
This is an example document:
{
"_id" : ObjectId("52e9658e2a13df5be22cf7dc"),
"desc" : "Something somethingson",
"imageurl" : "http://",
"tags" : [
{
"y" : 29.3,
"brand" : "52d2cecd0bd1bd844d000018",
"brandname" : "Zara",
"type" : "Bow Tie",
"x" : 20,
"color" : "52d50c19f8f8ca8448000001",
"number" : 0,
"season" : 0,
"cloth" : "52d50d57f8f8ca8448000006"
},
{
"y" : 29.3,
"brand" : "52d2cecd0bd1bd844d000018",
"brandname" : "Zara",
"type" : "Bow Tie",
"x" : 20,
"color" : "52d50c19f8f8ca8448000001",
"number" : 0,
"season" : 0,
"cloth" : "52d50d57f8f8ca8448000006"
}
],
"user_id" : "52e953942a13df5be22cf7af",
"username" : "Thompson",
"created" : 1386710259971,
"occasion" : "ID",
"sex" : 0
}
The query I would like to do should look something like this:
db.posts.aggregate([
{$match: {tags.color:"52d50c19f8f8ca8448000001", tags.brand:"52d2cecd0bd1bd844d000018", occasion: "ID"}},
{$sort:{"created":-1}},
{$skip:0},
{$limit:10}
])
my problem is that I dont know how to match anything inside an array in the document like "tags". How can I do this?
You could try to do it without aggregation framework:
db.posts.find(
{
occasion: "ID",
tags: { $elemMatch: { color:"52d50c19f8f8ca8448000001", brand:"52d2cecd0bd1bd844d000018" } }
}
).sort({created: -1}).limit(10)
And if you want to use aggregation:
db.posts.aggregate([
{$match:
{
tags: { $elemMatch: { color:"52d50c19f8f8ca8448000001", brand: "52d2cecd0bd1bd844d000018" } },
occasion: "ID"
}
},
{$sort:{"created":-1}},
{$limit:10}
])

mongodb how to retrieve all the value in fields type array?

Is there a way to retrieve all the values
of a fields type array
ie
{ "slug" : "my-post", "status" : "publish", "published" : ISODate("2014-01-26T18:28:11Z"), "title" : "my post", "body" : "my body post", "_id" : ObjectId("52e553c937fb8bf218b8c624"), "tags" : [ "js", "php", "scala" ], "created" : ISODate("2014-01-26T18:28:25.298Z"), "author" : "whisher", "__v" : 0 }
{ "slug" : "my-post-2", "status" : "publish", "published" : ISODate("2014-01-26T18:28:27Z"), "title" : "my post 2", "body" : "spost body", "_id" : ObjectId("52e5540837fb8bf218b8c625"), "tags" : [ "android", "actionscript", "java" ], "created" : ISODate("2014-01-26T18:29:28.915Z"), "author" : "whisher", "__v" : 0 }
the result should be like
"android", "actionscript", "java","js", "php", "scala"
You can $unwind, and then $group them back
db.collection.aggregate({ $unwind : "$tags" }, {$group:{_id: "$tags"}});
The result would be
{ _id: "android"},
{ _id: "actionscript"},
{ _id: "java"},
{ _id: "js"},
{ _id: "php"},
{ _id: "scala"}
Use the distinct command (reference):
> db.test.distinct("tags")
[ "js", "php", "scala", "actionscript", "android", "java" ]
You could use aggregation if you eventually needed something more complex:
> db.test.aggregate(
{ $project: { tags : 1 } },
{ $unwind : "$tags" },
{ $group : { _id: "$tags" } } );
Results:
[
{
"_id" : "java"
},
{
"_id" : "actionscript"
},
{
"_id" : "android"
},
{
"_id" : "scala"
},
{
"_id" : "php"
},
{
"_id" : "js"
}
]
I'd use $project (reference) to reduce the number of fields being passed through the pipeline though. In the example above, I've used $project to include only the tags for example.