How to change the type of field in an array of sub-documents - mongodb

I have a collection of documents, each of which possesses an array of subdocuments (ranging from 1-10,000 objects). In a small portion of these documents, a field in the arrayed sub-documents has been set to a string instead of an integer and I need to convert these values to an Integer
Here is a structural sample. Note that the DB Admin built the database and collection names with a '.' notation which has complicated some of my work thus far:
Collection Name: "employee.roster"
{
"_id" : ObjectId("5f11d4c28663f32e940696e0"),
"PdfId" : NumberInt(100),
"Staff" : [
{
"StaffId" : NumberInt(1),
"StaffName" : "John Doe"
},
{
"StaffId" : NumberInt(2),
"StaffName" : "John Smith"
},
{
"StaffId" : "3",
"StaffName" : "John Jones"
}
]
}
{
"_id" : ObjectId("5f11d4c28663f32e940696e1"),
"PdfId" : NumberInt(110),
"Staff" : [
{
"StaffId" : "4",
"StaffName" : "Bob Loblaw"
},
{
"StaffId" : NumberInt(5),
"StaffName" : "Edward Nigma"
},
{
"StaffId" : "6",
"StaffName" : "Hugh Mongus"
}
]
}
I have tried a variety of methods without success. Based on other posts, I thought something like this should work but I've generated nothing but errors:
db.getCollection("staff.roster").update(
{},
[{ $set: { "Staff.$[elem].StaffId": { $toInt: "$Staff.$[elem].StaffId" } } }],
{ "arrayFilters": [{ "elem.StaffId": { $type: 2 } } ], "multi": true }
)
ERROR MESSAGE:
WriteResult({
"nMatched" : 0,
"nUpserted" : 0,
"nModified" : 0,
"writeError" : {
"code" : 9,
"errmsg" : "arrayFilters may not be specified for pipeline-syle updates"
}
})
I've also tried this but I believe my notation is wrong because of the sub-documents:
db.getCollection("staff.roster").find( { "Staff.StaffId" : { $type : 2 } } ).forEach( function (x) {
x."Staff.StaffId" = new NumberInt(x."Staff.StaffId");
db.getCollection("staff.roster").save(x);
});
My output should look like this:
{
"_id" : ObjectId("5f11d4c28663f32e940696e0"),
"PdfId" : NumberInt(100),
"Staff" : [
{
"StaffId" : NumberInt(1),
"StaffName" : "John Doe"
},
{
"StaffId" : NumberInt(2),
"StaffName" : "John Smith"
},
{
"StaffId" : NumberInt(3),
"StaffName" : "John Jones"
}
]
}
{
"_id" : ObjectId("5f11d4c28663f32e940696e1"),
"PdfId" : NumberInt(110),
"Staff" : [
{
"StaffId" : NumberInt(4),
"StaffName" : "Bob Loblaw"
},
{
"StaffId" : NumberInt(5),
"StaffName" : "Edward Nigma"
},
{
"StaffId" : NumberInt(6),
"StaffName" : "Hugh Mongus"
}
]
}

You can use update with aggregation pipeline starting from MongoDB 4.2,
$map to iterate loop of Staff array, change the type of StaffId and merge objects with other fields using $mergeObjects
db.getCollection("staff.roster").update({},
[{
$set: {
Staff: {
$map: {
input: "$Staff",
in: {
$mergeObjects: [
"$$this",
{ StaffId: { $toInt: "$$this.StaffId" } }
]
}
}
}
}
}]
)
Playground

Related

match element in the array with aggregation

i have mongo db collection the follwing structure
{
{
"_id" : ObjectId("63e37afe7a3453d5014c011b"),
"schemaVersion" : NumberInt(1),
"Id" : "ObjectId("63e37afe7a3453d5014c0112")",
"Id1" : "ObjectId("63e37afe7a3453d5014c0113")",
"Id2" : "ObjectId("63e37afe7a3453d5014c0114")",
"collectionName" : "Country",
"List" : [
{
"countryId" : NumberInt(1),
"name" : "Afghanistan",
},{
"countryId" : NumberInt(1),
"name" : "India",
},
{
"countryId" : NumberInt(1),
"name" : "USA",
}
}
i need to match the value with id, id1, id2, collectionName and name in the list to get country id for example if match the below value
"Id" : "ObjectId("63e37afe7a3453d5014c0112")",
"Id1" : "ObjectId("63e37afe7a3453d5014c0113")",
"Id2" : "ObjectId("63e37afe7a3453d5014c0114")",
"collectionName" : "Country",
"name" : "Afghanistan",
i need result
{
"countryId" : 1,
"name" : "Afghanistan",
}
i tried like below
db.country_admin.aggregate([
{ $match: { collectionName: "Country" } },
{ $unwind : '$countryList' },
{ $project : { _id : 0, 'countryList.name' : 1, 'countryList.countryId' : 1 } }
]).pretty()
and i have following output
[
{
"List" : {
"countryId" : 1.0,
"name" : "Afghanistan"
}
},
{
"List" : {
"countryId" : 2.0,
"name" : "india"
}
},
{
"List" : {
"countryId" : 3.0,
"name" : "USA"
}
}]```
You can try using $filter to avoid $unwind like this example:
First $match by your desired condition(s).
Then $filter and get the first element (as "List.name": "Afghanistan" is used into $match stage there will be at least one result).
And output only values you want using $project.
db.collection.aggregate([
{
"$match": {
"Id": ObjectId("63e37afe7a3453d5014c0112"),
"Id1": ObjectId("63e37afe7a3453d5014c0113"),
"Id2": ObjectId("63e37afe7a3453d5014c0114"),
"collectionName": "Country",
"List.name": "Afghanistan",
}
},
{
"$project": {
"country": {
"$arrayElemAt": [
{
"$filter": {
"input": "$List",
"cond": {
"$eq": [
"$$this.name",
"Afghanistan"
]
}
}
},
0
]
}
}
},
{
"$project": {
"_id": 0,
"countryId": "$country.countryId",
"name": "$country.name"
}
}
])
Example here
By the way, using $unwind is also possible and you can check this example

How to multiple push to nested array

I have the following object:
{
"_id" : ObjectId("5d7052a3807ab14e286ba5bd"),
"companyBases" : [
{
"vehicles" : [],
"_id" : ObjectId("5d7052a3807ab14e286ba5b0"),
"name" : "Tech Parking 3",
"location" : {
"lng" : 50.01744,
"lat" : 20.033522
},
"country" : ObjectId("5d7052a2807ab14e286ba578"),
"__v" : 0
},
{
"vehicles" : [],
"_id" : ObjectId("5d7052a3807ab14e286ba5af"),
"name" : "Tech Parking 2",
"location" : {
"lng" : 50.036017,
"lat" : 20.086752
},
"country" : ObjectId("5d7052a2807ab14e286ba578"),
"__v" : 0
}
],
"nameOfCompany" : "Transport Tech Service 2 ",
"plan" : {
"name" : "Enterprise",
"vehicles" : 56,
"companyBases" : 10,
"users" : 10,
"price" : 1200
},
"__v" : 0
}
I've tried to do something like this:
db.companies.update(
{
_id: ObjectId("5d7052a3807ab14e286ba5bd")
},
{
$push: {
"companyBases.$[filter1].vehicles": {
"name": "Truck 1",
"combustion": 28
},
"companyBases.$[filter2].vehicles": {
"name": "Truck 2",
"combustion": 28
}
}
},
{
"arrayFilters": [
{
"filter1._id": "5d7052a3807ab14e286ba5b0"
},
{
"filter2._id": "5d7052a3807ab14e286ba5af"
}
]
}
)
But, it doesn't update my nested arrays "vehicles"
It returns me:
WriteResult({ "nMatched" : 1, "nUpserted" : 0, "nModified" : 0 })
I checked IDs and it's ok. I've created similar question a few days ago but with $set pipeline not $push - How to update in one query, multiple times without sharing to simple queries? , but i was thinking it's possible to rewrite that example to $push.
Issue: In array filters, the _id is matched with string instead of ObjectId
The following query would precisely update the collection:
db.companies.update(
{
_id: ObjectId("5d7052a3807ab14e286ba5bd")
},
{
$push: {
"companyBases.$[filter1].vehicles": {
"name": "Truck 1",
"combustion": 28
},
"companyBases.$[filter2].vehicles": {
"name": "Truck 2",
"combustion": 28
}
}
},
{
"arrayFilters": [{
"filter1._id": ObjectId("5d7052a3807ab14e286ba5b0")
},
{
"filter2._id": ObjectId("5d7052a3807ab14e286ba5af")
}
]
}
)

Match documents with their inner array element variables in MongoDB

I can't understand how to compare a document variable to another document variable. My goal is to match all Authors who have at least one book written in their mothertongue (native language).
However, after unwinding the books array, My $match: { mothertongue: "$bookLang"}} doesn't return return anything, eventhough they're the same in the $project stage.
Can you help me without javascript?
This is my current query:
db.author.aggregate([
{
$unwind: "$books"
},
{
$project: {
books: true,
mothertongue: true,
bookLang: "$books.lang"
}
},
{
$match: { mothertongue: "$bookLang"}
}
])
And here is a sample of the dataset
{
"_id" : ObjectId("5aa7b34a338571a7470be0eb"),
"fname" : "Minna",
"lname" : "Canth",
"mothertongue" : "Finnish",
"birthdate" : ISODate("1844-03-19T00:00:00Z"),
"deathdate" : ISODate("1897-05-12T00:00:00Z"),
"books" : [
{
"title" : "Anna Liisa",
"lang" : "Finnish",
"language" : "finnish",
"edition" : 1,
"cover" : "Hard",
"year" : 1895,
"categorytags" : [
"Finland"
],
"publisher" : [
{
"name" : "Tammi",
"pubId" : ObjectId("5aa7b34a338571a7470be0e4")
}
]
},
{
"title" : "The Burglary and The House of Roinila",
"lang" : "English (UK)",
"translator" : ObjectId("5aa7b34a338571a7470be0ee"),
"cover" : "Soft",
"year" : 2010,
"categorytags" : [
"Finland"
],
"publisher" : [
{
"name" : "Jonathan Cape",
"pubId" : ObjectId("5aa7b34a338571a7470be0e7")
}
]
},
{
"title" : "Anna Liisa 2 ed.",
"lang" : "Finnish",
"language" : "finnish",
"edition" : 2,
"cover" : "hard",
"year" : 1958,
"categorytags" : [
"Finland"
],
"publisher" : [
{
"name" : "Otava",
"pubId" : ObjectId("5aa7b34a338571a7470be0e9")
}
]
}
]
}
End goal. note I'm not interested in formatting just yet, just the filtering
{
"Author" : "Charles Bukowski",
"BooksInMothertongue" : [
"Love Is a Dog from Hell"
]
}
{
"Author" : "Minna Canth",
"BooksInMothertongue" : [
"Anna Liisa",
"Anna Liisa 2 ed."
]
}
...
Try this
db.author.aggregate([{
$match: {
books: {
$ne: []
}
}
},
{
$project: {
books: {
$filter: {
input: "$books",
as: "book",
cond: {
$eq: ["$$book.lang", "$mothertongue"]
}
}
},
fname: 1
}
}, {
$unwind: "$books"
},
{
$group: {
_id: "$_id",
Author: {
$first: '$fname'
},
BooksInMothertongue: {
$push: "$books.title"
}
}
}
])

Mongodb : get whether a document is the latest with a field value and filter on the result

I am trying to port an existing SQL schema into Mongo.
We have document tables, with sometimes several times the same document, with a different revision but the same reference. I want to get only the latest revisions of the documents.
A sample input data:
{
"Uid" : "xxx",
"status" : "ACCEPTED",
"reference" : "DOC305",
"code" : "305-D",
"title" : "Document 305",
"creationdate" : ISODate("2011-11-24T15:13:28.887Z"),
"creator" : "X"
},
{
"Uid" : "xxx",
"status" : "COMMENTED",
"reference" : "DOC306",
"code" : "306-A",
"title" : "Document 306",
"creationdate" : ISODate("2011-11-28T07:23:18.807Z"),
"creator" : "X"
},
{
"Uid" : "xxx",
"status" : "COMMENTED",
"reference" : "DOC306",
"code" : "306-B",
"title" : "Document 306",
"creationdate" : ISODate("2011-11-28T07:26:49.447Z"),
"creator" : "X"
},
{
"Uid" : "xxx",
"status" : "ACCEPTED",
"reference" : "DOC501",
"code" : "501-A",
"title" : "Document 501",
"creationdate" : ISODate("2011-11-19T06:30:35.757Z"),
"creator" : "X"
},
{
"Uid" : "xxx",
"status" : "ACCEPTED",
"reference" : "DOC501",
"code" : "501-B",
"title" : "Document 501",
"creationdate" : ISODate("2011-11-19T06:40:32.957Z"),
"creator" : "X"
}
Given this data, I want this result set (sometimes I want only the last revision, sometimes I want all revisions with an attribute telling me whether it's the latest):
{
"Uid" : "xxx",
"status" : "ACCEPTED",
"reference" : "DOC305",
"code" : "305-D",
"title" : "Document 305",
"creationdate" : ISODate("2011-11-24T15:13:28.887Z"),
"creator" : "X",
"lastrev" : true
},
{
"Uid" : "xxx",
"status" : "COMMENTED",
"reference" : "DOC306",
"code" : "306-B",
"title" : "Document 306",
"creationdate" : ISODate("2011-11-28T07:26:49.447Z"),
"creator" : "X",
"lastrev" : true
},
{
"Uid" : "xxx",
"status" : "ACCEPTED",
"reference" : "DOC501",
"code" : "501-B",
"title" : "Document 501",
"creationdate" : ISODate("2011-11-19T06:40:32.957Z"),
"creator" : "X",
"lastrev" : true
}
I already have a bunch of filters, sorting, and skip/limit (for pagination of data), so the final result set should be mindful of these constraints.
The current "find" query (built with the .Net driver), which filters fine but gives me all revisions of each document:
coll.find(
{ "$and" : [
{ "$or" : [
{ "deletedid" : { "$exists" : false } },
{ "deletedid" : null }
] },
{ "$or" : [
{ "taskid" : { "$exists" : false } },
{ "taskid" : null }
] },
{ "objecttypeuid" : { "$in" : ["xxxxx"] } }
] },
{ "_id" : 0, "Uid" : 1, "lastrev" : 1, "title" : 1, "code" : 1, "creator" : 1, "owner" : 1, "modificator" : 1, "status" : 1, "reference": 1, "creationdate": 1 }
).sort({ "creationdate" : 1 }).skip(0).limit(10);
Using another question, I have been able to build this aggregation, which gives me the latest revision of each document, but with not enough attributes in the result:
coll.aggregate([
{ $sort: { "creationdate": 1 } },
{
$group: {
"_id": "$reference",
result: { $last: "$creationdate" },
creationdate: { $last: "$creationdate" }
}
}
]);
I would like to integrating the aggregate with the find query.
I have found the way to mix aggregation and filtering:
coll.aggregate(
[
{ $match: {
"$and" : [
{ "$or" : [
{ "deletedid" : { "$exists" : false } },
{ "deletedid" : null }
] },
{ "$or" : [
{ "taskid" : { "$exists" : false } },
{ "taskid" : null }
] },
{ "objecttypeuid" : { "$in" : ["xxx"] } }
]
}
},
{ $sort: { "creationdate": 1 } },
{ $group: {
"_id": "$reference",
"doc": { "$last": "$$ROOT" }
}
},
{ $sort: { "doc.creationdate": 1 } },
{ $skip: skip },
{ $limit: limit }
],
{ allowDiskUse: true }
);
For each result node, this gives me a "doc" node with the document data. It has too much data still (it's missing projections), but it's a start.
Translated in .Net:
FilterDefinitionBuilder<BsonDocument> filterBuilder = Builders<BsonDocument>.Filter;
FilterDefinition<BsonDocument> filters = filterBuilder.Empty;
filters = filters & (filterBuilder.Not(filterBuilder.Exists("deletedid")) | filterBuilder.Eq("deletedid", BsonNull.Value));
filters = filters & (filterBuilder.Not(filterBuilder.Exists("taskid")) | filterBuilder.Eq("taskid", BsonNull.Value));
foreach (var f in fieldFilters) {
filters = filters & filterBuilder.In(f.Key, f.Value);
}
var sort = Builders<BsonDocument>.Sort.Ascending(orderby);
var group = new BsonDocument {
{ "_id", "$reference" },
{ "doc", new BsonDocument("$last", "$$ROOT") }
};
var aggregate = coll.Aggregate(new AggregateOptions { AllowDiskUse = true })
.Match(filters)
.Sort(sort)
.Group(group)
.Sort(sort)
.Skip(skip)
.Limit(rows);
return aggregate.ToList();
I'm pretty sure there are better ways to do this, though.
You answer is pretty close. Instead of $last, $max is better.
About $last operator:
Returns the value that results from applying an expression to the last document in a group of documents that share the same group by a field. Only meaningful when documents are in a defined order.
Get the last revision in each group, see code below in mongo shell:
db.collection.aggregate([
{
$group: {
_id: '$reference',
doc: {
$max: {
"creationdate" : "$creationdate",
"code" : "$code",
"Uid" : "$Uid",
"status" : "$status",
"title" : "$title",
"creator" : "$creator"
}
}
}
},
{
$project: {
_id: 0,
Uid: "$doc.Uid",
status: "$doc.status",
reference: "$_id",
code: "$doc.code",
title: "$doc.title",
creationdate: "$doc.creationdate",
creator: "$doc.creator"
}
}
]).pretty()
The output as your expect:
{
"Uid" : "xxx",
"status" : "ACCEPTED",
"reference" : "DOC501",
"code" : "501-B",
"title" : "Document 501",
"creationdate" : ISODate("2011-11-19T06:40:32.957Z"),
"creator" : "X"
}
{
"Uid" : "xxx",
"status" : "COMMENTED",
"reference" : "DOC306",
"code" : "306-B",
"title" : "Document 306",
"creationdate" : ISODate("2011-11-28T07:26:49.447Z"),
"creator" : "X"
}
{
"Uid" : "xxx",
"status" : "ACCEPTED",
"reference" : "DOC305",
"code" : "305-D",
"title" : "Document 305",
"creationdate" : ISODate("2011-11-24T15:13:28.887Z"),
"creator" : "X"
}

mongodb aggregation $group and then $push a object

this is my data :
> db.bookmarks.find({"userId" : "56b9b74bf976ab70ff6b9999"}).pretty()
{
"_id" : ObjectId("56c2210fee4a33579f4202dd"),
"userId" : "56b9b74bf976ab70ff6b9999",
"items" : [
{
"itemId" : "28",
"timestamp" : "2016-02-12T18:07:28Z"
},
{
"itemId" : "29",
"timestamp" : "2016-02-12T18:07:29Z"
},
{
"itemId" : "30",
"timestamp" : "2016-02-12T18:07:30Z"
},
{
"itemId" : "31",
"timestamp" : "2016-02-12T18:07:31Z"
},
{
"itemId" : "32",
"timestamp" : "2016-02-12T18:07:32Z"
},
{
"itemId" : "33",
"timestamp" : "2016-02-12T18:07:33Z"
},
{
"itemId" : "34",
"timestamp" : "2016-02-12T18:07:34Z"
}
]
}
I want to have something like (actually i hope the _id can become userId too) :
{
"_id" : "56b9b74bf976ab70ff6b9999",
"items" : [
{ "itemId": "32", "timestamp": "2016-02-12T18:07:32Z" },
{ "itemId": "31", "timestamp": "2016-02-12T18:07:31Z" },
{ "itemId": "30", "timestamp": "2016-02-12T18:07:30Z" }
]
}
What I have now :
> db.bookmarks.aggregate(
... { $match: { "userId" : "56b9b74bf976ab70ff6b9999" } },
... { $unwind: '$items' },
... { $sort: { 'items.timestamp': -1} },
... { $skip: 2 },
... { $limit: 3},
... { $group: { '_id': '$userId' , items: { $push: '$items.itemId' } } }
... ).pretty()
{ "_id" : "56b9b74bf976ab70ff6b9999", "items" : [ "32", "31", "30" ] }
i tried to read the document in mongo and find out i can $push, but somehow i cannot find a way to push such object, which is not defined anywhere in the whole object. I want to have the timestamp also.. but i don't know how should i modified the $group (or others??) to do so. thanks for helping!
This code, which I tested in the MongoDB 3.2.1 shell, should give you the output format that you want:
> db.bookmarks.aggregate(
{ "$match" : { "userId" : "Ursula" } },
{ "$unwind" : "$items" },
{ "$sort" : { "items.timestamp" : -1 } },
{ "$skip" : 2 },
{ "$limit" : 3 },
{ "$group" : { "_id" : "$userId", items: { "$push" : { "myPlace" : "$items.itemId", "myStamp" : "$items.timestamp" } } } } ).pretty()
Running the above will produce this output:
{
"_id" : "Ursula",
"items" : [
{
"myPlace" : "52",
"myStamp" : ISODate("2016-02-13T18:07:32Z")
},
{
"myPlace" : "51",
"myStamp" : ISODate("2016-02-13T18:07:31Z")
},
{
"myPlace" : "50",
"myStamp" : ISODate("2016-02-13T18:07:30Z")
}
]
}
In MongoDB version 3.2.x, you can also use the $out operator in the very last stage of the aggregation pipeline, and have the output of the aggregation query written to a collection. Here is the code I used:
> db.bookmarks.aggregate(
{ "$match" : { "userId" : "Ursula" } },
{ "$unwind" : "$items" },
{ "$sort" : { "items.timestamp" : -1 } },
{ "$skip" : 2 },
{ "$limit" : 3 },
{ "$group" : { "_id" : "$userId", items: { "$push" : { "myPlace" : "$items.itemId", "myStamp" : "$items.timestamp" } } } },
{ "$out" : "ursula" } )
This gives me a collection named "ursula":
> show collections
ursula
and I can query that collection:
> db.ursula.find().pretty()
{
"_id" : "Ursula",
"items" : [
{
"myPlace" : "52",
"myStamp" : ISODate("2016-02-13T18:07:32Z")
},
{
"myPlace" : "51",
"myStamp" : ISODate("2016-02-13T18:07:31Z")
},
{
"myPlace" : "50",
"myStamp" : ISODate("2016-02-13T18:07:30Z")
}
]
}
>
Last of all, this is the input document I used in the aggregation query. You can compare this document to how I coded the aggregation query to see how I built the new items array.
> db.bookmarks.find( { "userId" : "Ursula" } ).pretty()
{
"_id" : ObjectId("56c240ed55f2f6004dc3b25c"),
"userId" : "Ursula",
"items" : [
{
"itemId" : "48",
"timestamp" : ISODate("2016-02-13T18:07:28Z")
},
{
"itemId" : "49",
"timestamp" : ISODate("2016-02-13T18:07:29Z")
},
{
"itemId" : "50",
"timestamp" : ISODate("2016-02-13T18:07:30Z")
},
{
"itemId" : "51",
"timestamp" : ISODate("2016-02-13T18:07:31Z")
},
{
"itemId" : "52",
"timestamp" : ISODate("2016-02-13T18:07:32Z")
},
{
"itemId" : "53",
"timestamp" : ISODate("2016-02-13T18:07:33Z")
},
{
"itemId" : "54",
"timestamp" : ISODate("2016-02-13T18:07:34Z")
}
]
}