Creating an embedded document by getting data from another sub document - mongodb

I have a document which looks like,
org: {
"name": "tera",
"orgLocation": {
"street":"xyz",
"postal Code": "45893",
"latitude": "64.23456",
"longitude": "62.75469"
}
}
now I want to make latitude and longitude as a separate sub document keeping the so that the document looks like:
org: {
"name":"tera",
"orgLocation": {
"street":"xyz",
"postal Code":"45893",
"latitude":"64.23456",
"longitude":"62.75469"
},
orgGeo: {
"latitude":"64.23456",
"longitude":"62.75469"
}
}
As am new to mongoDB am unable to figure out how to?
Can any one help me in this.

If you want to update your document using values of existing fields in the same document you need to loop over each document using the .forEach method and update each document with "Bulk" operations for maximum efficiency.
var bulk = bulk = db.test.initializeOrderedBulkOp(),
count = 0;
db.test.find({ "org": { "$exists": true } }).forEach(function(doc) {
var latitude = doc["org"]["orgLocation"]["latitude"],
longitude = doc["org"]["orgLocation"]["longitude"];
bulk.find({ "_id": doc._id }).update({
"$set": {
"org.orgGeo.latitude": latitude,
"org.orgGeo.longitude": longitude
}
});
count++;
if (count % 100 == 0) {
// Execute per 100 operations and re-init.
bulk.execute();
bulk = db.test.initializeOrderedBulkOp();
}
})
// Clean up queues
if (count % 100 != 0)
bulk.execute();
After running this query your documents will look like this:
{
"_id" : ObjectId("55fd008f6606c68eb1d64934"),
"org" : {
"name" : "tera",
"orgLocation" : {
"street" : "xyz",
"postal Code" : "45893",
"latitude" : "64.23456",
"longitude" : "62.75469"
},
"orgGeo" : {
"latitude" : "64.23456",
"longitude" : "62.75469"
}
}
}

Related

how to query for exact mach in unknown number of subfields in mongodb

I have a collection where documents can have an unknown number of sub documents:
"agent_id": {
"0":"1234",
"1":"2234",...etc
How do I search for an exact match in all the agent_id sub-fields?
You need to dynamically create an object with properties that are a concatenation of the embedded document name agent_id with the dot (.) and the field name, enclosed in quotes, something like this:
var query = {
"agent_id.0": "78343",
"agent_id.1": "78343",
"agent_id.2": "78343",
"agent_id.3": "78343",
...
"agent_id.n": "78343"
}
One way to create the object is generate the sub-documents keys with mapReduce. The following demonstrates this approach. In the Map-Reduce operation, an array of keys in the agent_id subdocument is generated to an output collection "collection_keys" and then used to produce the find() query expression:
Suppose you populate a sample collection
db.collection.insert([
{
"agent_id": {
"0":"1234",
"1":"2234",
"56":"8451",
"74":"1475",
"10":"1234"
}
},
{
"agent_id": {
"5":"5874",
"18":"2351"
}
}
])
Running the following mapReduce operation
var mr = db.runCommand({
"mapreduce" : "collection",
"map" : function() {
for (var key in this.agent_id) { emit(key, null); }
},
"reduce" : function(key, stuff) {
return null
},
"out": "collection" + "_keys"
});
var query = { "$or": [] },
value = "1234";
db[mr.result].distinct("_id").forEach(function (key){
var obj = {};
obj["agent_id." + key] = value;
query["$or"].push(obj)
});
printjson(query);
will produce:
{
"$or" : [
{
"agent_id.0" : "1234"
},
{
"agent_id.1" : "1234"
},
{
"agent_id.10" : "1234"
},
{
"agent_id.18" : "1234"
},
{
"agent_id.5" : "1234"
},
{
"agent_id.56" : "1234"
},
{
"agent_id.74" : "1234"
}
]
})
You can then use the query document in your find() query:
db.collection.find(query)
which will produce the result:
/* 0 */
{
"_id" : ObjectId("561d5312cd05efc95a1ea1f4"),
"agent_id" : {
"0" : "1234",
"1" : "2234",
"56" : "8451",
"74" : "1475",
"10" : "1234"
}
}

How do I maintain data types when copying document?

I need to make a change to use a generated ObjectId instead of String I was using but the field data type changes from Int to Double.
For example say we have a document
{_id: "Product Name", count: 415 }
Now I want to create a document
{_id: "some object id", name: "Product Name", count: 415 }
I am using similar code below but it makes the count a Double.
var cursor = db.products.find()
cursor.forEach(function(item)
{
var old_id= item._id;
item.name = old_id;
delete item._id;
db.products.insert(item);
db.products.remove({_id:old_id});
});
I can add this in the loop: item.count = NumberInt( item.count) to make sure it's an Int but
I really don't want to do this for each field that I have.
Is there anyway to do this without manually having to cast them? I don't understand why it takes an Int and turns it into a Double. I know Double is the default but the fields that I am working with are already Integers.
Well if I understand you, your documents look like this:
{ "_id" : "Apple", "count" : 187 }
{ "_id" : "Google", "count" : 123 }
{ "_id" : "Amazon", "count" : 325 }
{ "_id" : "Oracle", "count" : 566 }
You can use the Bulk Api to update your collection.
var bulk = db.collection.initializeUnorderedBulkOp();
Var count = 0;
db.collection.aggregate([{ $project: { '_id': 0, 'name': '$_id', 'count': 1 }}]).forEach(function(doc){
bulk.find({'_id': doc.name}).remove();
bulk.insert(doc);
count++;
if (count % 1000 == 0){
// Execute per 1000 operations and re-init.
bulk.execute();
bulk = db.collection.initializeUnorderedBulkOp();
}})
// Clean up queues
if (count % 1000 != 0){
bulk.execute();
}
Then:
db.collection.find()
Yields the following documents:
{ "_id" : ObjectId("55a7e2c7eb68594275546c7c"), "count" : 187, "name" : "Apple" }
{ "_id" : ObjectId("55a7e2c7eb68594275546c7d"), "count" : 123, "name" : "Google" }
{ "_id" : ObjectId("55a7e2c7eb68594275546c7e"), "count" : 325, "name" : "Amazon" }
{ "_id" : ObjectId("55a7e2c7eb68594275546c7f"), "count" : 566, "name" : "Oracle" }
Is there anyway to do this without manually having to cast them? I don't understand why it takes an Int and turns it into a Double. I know Double is the default but the fields that I am working with are already Integers.
You really don't need to worry about that if you are using the shell but as pointed out in the comment you can always use a language with native support for integers to preserve the data type.

Compare array elements,remove the one with the lowest score

There are 200 documents in school db. I must remove each document which has "type":"homework" and the lowest score.
{
"_id" : 0,
"name" : "aimee Zank",
"scores" :
[
{
"type" : "exam",
"score" : 1.463179736705023
},
{
"type" : "quiz",
"score" : 11.78273309957772
},
{
"type" : "homework",
"score" : 6.676176060654615
},
{
"type" : "homework",
"score" : 35.8740349954354
}
]
}
For example,here
{
"type" : "homework",
"score" : 6.676176060654615
}
must be removed as score = 6.6 < 35.8
I sorted all the documents like this:
db.students.find({"scores.type":"homework"}).sort({"scores.score":1})
But I do not know how then to remove the doc having the lowest score and type:homework???
NOTE: how to solve it by not using aggregation method? E.g., by sorting and then updating.
This can be done in a couple of steps. The first step is to grab a list of the documents with the minimum score by using the aggregation framework with $match, $unwind and $group operators that streamlines your documents to find the minimum score for each document:
lowest_scores_docs = db.school.aggregate([
{ "$match": {"scores.type": "homework"} },
{ "$unwind": "$scores" }, { "$match": {"scores.type": "homework"} },
{ "$group": { "_id":"$_id", "lowest_score": {"$min": "$scores.score" } } } ] )
The second step is to loop through the dictionary above and use the $pull operator in the update query to remove the element from the array as follows:
for result in lowest_scores_docs["result"]:
db.school.update({ "_id": result["_id"] },
{ "$pull": { "scores": { "score": result["lowest_score"] } } } )
import pymongo
import sys
# connnecto to the db on standard port
connection = pymongo.MongoClient("mongodb://localhost")
db = connection.school # attach to db
students = db.students # specify the colllection
try:
cursor = students.find({})
print(type(cursor))
for doc in cursor:
hw_scores = []
for item in doc["scores"]:
if item["type"] == "homework":
hw_scores.append(item["score"])
hw_scores.sort()
hw_min = hw_scores[0]
#students.update({"_id": doc["_id"]},
# {"$pull":{"scores":{"score":hw_min}}})
except:
print ("Error trying to read collection:" + sys.exc_info()[0])

Is it possible to retrieve a 'time span' from a MongoDB query, using the timestamp within an ObjectId?

We have a basic enquiry management tool that we're using to track some website enquiries in our administration suite, and we're using the ObjectId of each document in our enquiries collection to sort the enquiries by the date they were added.
{
"_id" : ObjectId("53a007db144ff47be1000003"),
"comments" : "This is a test enquiry. Please ignore. We'll delete it shortly.",
"customer" : {
"name" : "Test Enquiry",
"email" : "test#test.com",
"telephone" : "07890123456",
"mobile" : "07890123456",
"quote" : false,
"valuation" : false
},
"site" : [],
"test" : true,
"updates" : [
{
"_id" : ObjectId("53a007db144ff47be1000001"),
"status" : "New",
"status_id" : ObjectId("537de7c3a5e6e668ffc2335c"),
"status_index" : 100,
"substatus" : "New Web Enquiry",
"substatus_id" : ObjectId("5396bb9fa5e6e668ffc23388"),
"notes" : "New enquiry received from website.",
},
{
"_id" : ObjectId("53a80c977d299cfe91bacf81"),
"status" : "New",
"status_id" : ObjectId("537de7c3a5e6e668ffc2335c"),
"status_index" : 100,
"substatus" : "Attempted Contact",
"substatus_id" : ObjectId("53a80e06a5e6e668ffc2339e"),
"notes" : "In this test, we pretend that we've not managed to get hold of the customer on the first attempt.",
},
{
"_id" : ObjectId("53a80e539b966b8da5c40c36"),
"status" : "Approved",
"status_id" : ObjectId("52e77a49d85e95f00ebf6c72"),
"status_index" : 200,
"substatus" : "Enquiry Confirmed",
"substatus_id" : ObjectId("53901f1ba5e6e668ffc23372"),
"notes" : "In this test, we pretend that we've got hold of the customer after failing to contact them on the first attempt.",
}
]
}
Within each enquiry is an updates array of objects which also have an ObjectId as their main identity field. We're using an $unwind and $group aggregation to pull the first and latest updates, as well as the count of updates, making sure we only take enquiries where there have been more than one update (as one is automatically inserted when the enquiry is made):
db.enquiries.aggregate([
{
$match: {
"test": true
}
},
{
$unwind: "$updates"
},
{
$group: {
"_id": "$_id",
"latest_update_id": {
$last: "$updates._id"
},
"first_update_id": {
$first: "$updates._id"
},
"update_count": {
$sum: 1
}
}
},
{
$match: {
"update_count": {
$gt: 1
}
}
}
])
This results in the following output:
{
"result" : [
{
"_id" : ObjectId("53a295ad122ea80200000005"),
"latest_update_id" : ObjectId("53a80bdc7d299cfe91bacf7e"),
"first_update_id" : ObjectId("53a295ad122ea80200000003"),
"update_count" : 2
},
{
"_id" : ObjectId("53a007db144ff47be1000003"),
"latest_update_id" : ObjectId("53a80e539b966b8da5c40c36"),
"first_update_id" : ObjectId("53a007db144ff47be1000001"),
"update_count" : 3
}
],
"ok" : 1
}
This is then passed through to our code (node.js, in this case) where we perform a few operations on it and then present some information on our dashboard.
Ideally, I'd like to add another $group pipeline aggregation to the query which would subtract the timestamp of first_update_id from the timestamp of latest_update_id to give us a timespan, which we could then use $avg on.
Can anyone tell me if this is possible? (Thank you!)
As Neil already pointed out, you can't get to the timestamp from the ObjectId in the aggregation framework.
You said that speed is not important, so using MapReduce you can get what you want:
var map = function() {
if (this.updates.length > 1) {
var first = this.updates[0];
var last = this.updates[this.updates.length - 1];
var diff = last._id.getTimestamp() - first._id.getTimestamp();
var val = {
latest_update_id : last._id,
first_update_id : first._id,
update_count : this.updates.length,
diff: diff
}
emit(this._id, val);
}
};
var reduce = function() { };
db.runCommand(
{
mapReduce: "enquiries",
map: map,
reduce: reduce,
out: "mrresults",
query: { test : true}
}
);
This are the results:
{
"_id" : ObjectId("53a007db144ff47be1000003"),
"value" : {
"latest_update_id" : ObjectId("53a80e539b966b8da5c40c36"),
"first_update_id" : ObjectId("53a007db144ff47be1000001"),
"update_count" : 3,
"diff" : 525944000
}
}
Edit:
If you want to get the average diff for all documents you can do it like this:
var map = function() {
if (this.updates.length > 1) {
var first = this.updates[0];
var last = this.updates[this.updates.length - 1];
var diff = last._id.getTimestamp() - first._id.getTimestamp();
emit("1", {diff : diff});
}
};
var reduce = function(key, values) {
var reducedVal = { count: 0, sum: 0 };
for (var idx = 0; idx < values.length; idx++) {
reducedVal.count += 1;
reducedVal.sum += values[idx].diff;
}
return reducedVal;
};
var finalize = function (key, reducedVal) {
reducedVal.avg = reducedVal.sum/reducedVal.count;
return reducedVal;
};
db.runCommand(
{
mapReduce: "y",
map: map,
reduce: reduce,
finalize : finalize,
out: "mrtest",
query: { test : true}
}
);
And the example output:
> db.mrtest.find().pretty()
{
"_id" : "1",
"value" : {
"count" : 2,
"sum" : 1051888000,
"avg" : 525944000
}
}

Search in an array of an embedded object in MongoDB

Given this structure for a school object:
{
"grade_spans" :
{
"0": {
"grade_span_key" : "K_5",
"name": "Elementary School"
},
"1": {
"grade_span_key" : "6_8",
"name": "Junior High-School"
}
}
}
How do I find a school for a given grade_span_key?
db.schools.find({ "grade_span_key": "K_5" })
returns empty.
Update: Sorry, I copied the structure incorrectly. It's actually an Embedded Object not a collection.
Update #2: There was a doctrine2 annotation I was using incorrectly: #MongoDB\EmbedMany(strategy="set"). I change the strategy to pushAll (which is the default)
If this field is just embedded into the main document #sergios answer will work just fine and it is not clear why his query wouldn't work as you don't provide an example of the document structure only of the embedded structure.
Also as #JohnnyHK says, rebuild that object as an array since dynamic keys in this case would be harder.
If you are looking to pick out matching rows from the embedded document and not the full document. This is a little harder but is possible:
db.schools.aggregate({
{$unwind: "$grade_spans"},
{$match: {"grade_spans.grade_span_key": "K_5"}},
{$group: {_id: "$_id", grade_spans: {$push: "$grade_spans"}}}
})
Something like the above should return a document of the structure:
{
_id: {},
grade_spans:[{
"grade_span_key" : "K_5",
"name" : "Elementary School"
}]
}
You should use full path to the property, in dotted notation.
> db.schools.find({"grade_spans.grade_span_key": "K_5"})
{
"_id" : ObjectId("50801cc5ab582e310adc0e41"),
"grade_spans" : [
{
"grade_span_key" : "K_5",
"name" : "Elementary School"
},
{
"grade_span_key" : "6_8",
"name" : "Junior High-School"
}
]
}
Given this structure :
{
"grade_spans" : {
"0": { "grade_span_key" : "K_5",
"name": "Elementary School" },
"1": { "grade_span_key" : "6_8",
"name": "Junior High-School" }
}
}
You can try with map/reduce function :
var mapFunction = function() {
for (var i in this.grade_spans) {
// Add the name of the school in a list
var schools = [];
schools[0] = this.grade_spans[i].name;
// Create out object : { schools : ["Elementary School"] } or { schools : ["Junior High-School"] }
var out = {};
out.schools = schools;
// Create key (K_5 or 6_8)
var key = this.grade_spans[i].grade_span_key;
emit(key, out);
}
};
var reduceFunction = function(key, values) {
var schools = [];
for (var i = 0; i < values.length; i++) {
schools.push.apply(schools, values[i].schools);
}
return {schools:schools};
}
db.schools.mapReduce(
mapFunction,
reduceFunction,
{ out: "map_reduce_grade_spans", sort: {_id:1} }
)
And then :
db.map_reduce_grade_spans.find({_id:"K_5"});