MongoDb - Change type from Int to Double - mongodb

We have a collection that looks like this:
{
"_id" : "10571:6",
"v" : 261355,
"ts" : 4.88387e+008
}
Now, some of the "v" are ints, some are doubles. I want to change them all to doubles.
I've tried a few things but nothing works (v is an int32 for this record, I want to change it to a double):
db.getCollection('VehicleLastValues')
.find
(
{_id : "10572:6"}
)
.forEach
(
function (x)
{
temp = x.v * 1.0;
db.getCollection('VehicleLastValues').save(x);
}}
Things I've tried:
x.v = x.v * 1.1 / 1.1;
x.v = parseFloat (new String(x.v));
But I can't get it to be saved as a double...

By default all "numbers" are stored as "double" in MongoDB unless generally cast overwise.
Take the following samples:
db.sample.insert({ "a": 1 })
db.sample.insert({ "a": NumberLong(1) })
db.sample.insert({ "a": NumberInt(1) })
db.sample.insert({ "a": 1.223 })
This yields a collection like this:
{ "_id" : ObjectId("559bb1b4a23c8a3da73e0f76"), "a" : 1 }
{ "_id" : ObjectId("559bb1bba23c8a3da73e0f77"), "a" : NumberLong(1) }
{ "_id" : ObjectId("559bb29aa23c8a3da73e0f79"), "a" : 1 }
{ "_id" : ObjectId("559bb30fa23c8a3da73e0f7a"), "a" : 1.223 }
Despite the different constructor functions note how several of the data points there look much the same. The MongoDB shell itself doesn't always clearly distinquish between them, but there is a way you can tell.
There is of course the $type query operator, which allows selection of BSON Types.
So testing this with Type 1 - Which is "double":
> db.sample.find({ "a": { "$type": 1 } })
{ "_id" : ObjectId("559bb1b4a23c8a3da73e0f76"), "a" : 1 }
{ "_id" : ObjectId("559bb30fa23c8a3da73e0f7a"), "a" : 1.223 }
You see that both the first insert and the last are selected, but of course not the other two.
So now test for BSON Type 16 - which is a 32-bit integer
> db.sample.find({ "a": { "$type": 16 } })
{ "_id" : ObjectId("559bb29aa23c8a3da73e0f79"), "a" : 1 }
That was the "third" insertion which used the NumberInt() function in the shell. So that function and other serialization from your driver can set this specific BSON type.
And for the BSON Type 18 - which is 64-bit integer
> db.sample.find({ "a": { "$type": 18 } })
{ "_id" : ObjectId("559bb1bba23c8a3da73e0f77"), "a" : NumberLong(1) }
The "second" insertion which was contructed via NumberLong().
If you wanted to "weed out" things that were "not a double" then you would do:
db.sample.find({ "$or": [{ "a": { "$type": 16 } },{ "a": { "$type": 18 } }]})
Which are the only other valid numeric types other than "double" itself.
So to "convert" these in your collection, you can "Bulk" process like this:
var bulk = db.sample.initializeUnorderedBulkOp(),
count = 0;
db.sample.find({
"$or": [
{ "a": { "$type": 16 } },
{ "a": { "$type": 18 } }
]
}).forEach(function(doc) {
bulk.find({ "_id": doc._id })
.updateOne({
"$set": { "b": doc.a.valueOf() } ,
"$unset": { "a": 1 }
});
bulk.find({ "_id": doc._id })
.updateOne({ "$rename": { "b": "a" } });
count++;
if ( count % 1000 == 0 ) {
bulk.execute()
bulk = db.sample.initializeUnOrderedBulkOp();
}
})
if ( count % 1000 != 0 ) bulk.execute();
What that does is performed in three steps "in bulk":
Re-cast the value to a new field as a "double"
Remove the old field with the unwanted type
Rename the new field to the old field name
This is necessary since the BSON type information is "sticky" to the field element once created. So in order to "re-cast" you need to completely remove the old data which includes the original field assignment.
So that should explain how to "detect" and also "re-cast" unwanted types in your documents.

Related

MongoDB "count" using "$in" becomes too slow

I have a big collection (~30millions of records) and I am querying it in different ways, some of them work pretty good:
// Query #1
db.getCollection('my_collection').find({ "parent_uuid": "77796c50-7dc3-0134-21f1-0a81e8b09a82" }).count()
// => 415262 (in less than one second)
// Query #2
db.getCollection('my_collection').find({ "parent_uuid": "35529cc0-330a-0135-3ba3-0a901406a434" }).count()
// => 1 (in less than one second)
But then I request this and it gets stack
// Query #3
db.getCollection('my_collection').find({
"parent_uuid": { "$in": ["77796c50-7dc3-0134-21f1-0a81e8b09a82", "35529cc0-330a-0135-3ba3-0a901406a434"] }
}).count()
Indexes
Among other indexes I have this one:
{
"v" : 1,
"key" : {
"parent_uuid" : 1
},
"name" : "parent_uuid_1",
"ns" : "my_database.my_collection"
}
I need to use the $in option with several UUIDs, what am I doing wrong?
Update 1: Explains
Explain for Query #1: https://pastebin.com/hf33ZWZ8
Explain for Query #2: https://pastebin.com/fNmj8q70
Explain for Query #3: https://pastebin.com/WE3AVJCT
My hacky solution (and it's still not as fast as it should be) is to use aggregate and group, then sum the results:
db.getCollection('my_collection').aggregate([
{
"$match": {
"parent_uuid": {
$in: ["77796c50-7dc3-0134-21f1-0a81e8b09a82", "35529cc0-330a-0135-3ba3-0a901406a434"]
}
}
},
{
"$group": {
"_id" : "$parent_uuid",
"initial_count": { "$sum": 1 }
}
},
{
"$group": {
"_id" : null,
"count": { "$sum": "$initial_count" }
}
}
}])
will result in:
// less than a second
/* 1 */
{
"_id" : null,
"count" : 416175.0
}

get from array what's not in mongo [duplicate]

I have a collection of documents which contain unique id field. Now I have a list of ids which may contain some ids that do not exist in the collection. What's the best way to find out those ids from the list?
I know I can use $in operator to get the documents which have ids contained in the list then compare with the given id list, but is there better way to do it?
I suppose you have the following documents in your collection:
{ "_id" : ObjectId("55b725fd7279ca22edb618bb"), "id" : 1 }
{ "_id" : ObjectId("55b725fd7279ca22edb618bc"), "id" : 2 }
{ "_id" : ObjectId("55b725fd7279ca22edb618bd"), "id" : 3 }
{ "_id" : ObjectId("55b725fd7279ca22edb618be"), "id" : 4 }
{ "_id" : ObjectId("55b725fd7279ca22edb618bf"), "id" : 5 }
{ "_id" : ObjectId("55b725fd7279ca22edb618c0"), "id" : 6 }
and the following list of id
var listId = [ 1, 3, 7, 9, 8, 35 ];
We can use the .filter method to return the array of ids that is not in your collection.
var result = listId.filter(function(el){
return db.collection.distinct('id').indexOf(el) == -1; });
This yields
[ 7, 9, 8, 35 ]
Now you can also use the aggregation frameworks and the $setDifference operator.
db.collection.aggregate([
{ "$group": { "_id": null, "ids": { "$addToSet": "$id" }}},
{ "$project" : { "missingIds": { "$setDifference": [ listId, "$ids" ]}, "_id": 0 }}
])
This yields:
{ "missingIds" : [ 7, 9, 8, 35 ] }
Unfortunately MongoDB can only use built in functions (otherwise I'd recommend using a set) but you could try and find all distinct id's in your list then just manually pull them out.
Something like (untested):
var your_unique_ids = ["present", "not_present"];
var present_ids = db.getCollection('your_col').distinct('unique_field', {unique_field: {$in: your_unique_ids}});
for (var i=0; i < your_unique_ids.length; i++) {
var some_id = your_unique_ids[i];
if (present_ids.indexOf(some_id) < 0) {
print(some_id);
}
}
Below query will fetch you the result :
var listid = [1,2,3,4];
db.collection.aggregate([
{$project: { uniqueId :
{
"$setDifference":
[ listid , db.collection.distinct( "unique_field" )]} , _id : 0 }
},
{$limit:1}
]);

How to Avoid Duplicate Entries in MongoDb Meteor App

How to avoid duplicate entries in mongoDb in Meteor application.
On the command: db.products.find({},{"TEMPLATE_NAME": 1},{unique : true})
{ "_id" : ObjectId("5555d0a16ce3b01bb759a771"), "TEMPLATE_NAME" : "B" }
{ "_id" : ObjectId("5555d0b46ce3b01bb759a772"), "TEMPLATE_NAME" : "A" }
{ "_id" : ObjectId("5555d0c86ce3b01bb759a773"), "TEMPLATE_NAME" : "C" }
{ "_id" : ObjectId("5555d0f86ce3b01bb759a774"), "TEMPLATE_NAME" : "C" }
{ "_id" : ObjectId("5555d1026ce3b01bb759a775"), "TEMPLATE_NAME" : "A" }
{ "_id" : ObjectId("5555d1086ce3b01bb759a776"), "TEMPLATE_NAME" : "B" }
I want to retrieve only the unique template names and show them on HTML page.
Use the aggregation framework where your pipeline stages consist of the $group and $project operators respectively. The $group operator step groups the input documents by the given key and thus will return distinct documents in the result. The $project operator then reshapes each document in the stream, such as by adding new fields or removing existing fields:
db.products.aggregate([
{
"$group": {
"_id": "$TEMPLATE_NAME"
}
},
{
"$project": {
"_id": 0,
"TEMPLATE_NAME": "$_id"
}
}
])
Result:
/* 0 */
{
"result" : [
{
"TEMPLATE_NAME" : "C"
},
{
"TEMPLATE_NAME" : "A"
},
{
"TEMPLATE_NAME" : "B"
}
],
"ok" : 1
}
You could then use the meteorhacks:aggregate package to implement the aggregation in Meteor:
Add to your app with
meteor add meteorhacks:aggregate
Then simply use .aggregate function like below.
var products = new Mongo.Collection('products');
var pipeline = [
{
"$group": {
"_id": "$TEMPLATE_NAME"
}
},
{
"$project": {
"_id": 0,
"TEMPLATE_NAME": "$_id"
}
}
];
var result = products.aggregate(pipeline);
-- UPDATE --
An alternative that doesn't use aggregation is using underscore's methods to return distinct field values from the collection's find method as follows:
var distinctTemplateNames = _.uniq(Collection.find({}, {
sort: {"TEMPLATE_NAME": 1}, fields: {"TEMPLATE_NAME": true}
}).fetch().map(function(x) {
return x.TEMPLATE_NAME;
}), true)
;
This will return an array with distinct product template names ["A", "B", "C"]
You can check out some tutorials which explain the above approach in detail: Get unique values from a collection in Meteor and METEOR – DISTINCT MONGODB QUERY.
You can use distinct of mongodb like :
db.collectionName.distinct("TEMPLATE_NAME")
This query will return you array of distinct TEMPLATE_NAME

Compare array elements,remove the one with the lowest score

There are 200 documents in school db. I must remove each document which has "type":"homework" and the lowest score.
{
"_id" : 0,
"name" : "aimee Zank",
"scores" :
[
{
"type" : "exam",
"score" : 1.463179736705023
},
{
"type" : "quiz",
"score" : 11.78273309957772
},
{
"type" : "homework",
"score" : 6.676176060654615
},
{
"type" : "homework",
"score" : 35.8740349954354
}
]
}
For example,here
{
"type" : "homework",
"score" : 6.676176060654615
}
must be removed as score = 6.6 < 35.8
I sorted all the documents like this:
db.students.find({"scores.type":"homework"}).sort({"scores.score":1})
But I do not know how then to remove the doc having the lowest score and type:homework???
NOTE: how to solve it by not using aggregation method? E.g., by sorting and then updating.
This can be done in a couple of steps. The first step is to grab a list of the documents with the minimum score by using the aggregation framework with $match, $unwind and $group operators that streamlines your documents to find the minimum score for each document:
lowest_scores_docs = db.school.aggregate([
{ "$match": {"scores.type": "homework"} },
{ "$unwind": "$scores" }, { "$match": {"scores.type": "homework"} },
{ "$group": { "_id":"$_id", "lowest_score": {"$min": "$scores.score" } } } ] )
The second step is to loop through the dictionary above and use the $pull operator in the update query to remove the element from the array as follows:
for result in lowest_scores_docs["result"]:
db.school.update({ "_id": result["_id"] },
{ "$pull": { "scores": { "score": result["lowest_score"] } } } )
import pymongo
import sys
# connnecto to the db on standard port
connection = pymongo.MongoClient("mongodb://localhost")
db = connection.school # attach to db
students = db.students # specify the colllection
try:
cursor = students.find({})
print(type(cursor))
for doc in cursor:
hw_scores = []
for item in doc["scores"]:
if item["type"] == "homework":
hw_scores.append(item["score"])
hw_scores.sort()
hw_min = hw_scores[0]
#students.update({"_id": doc["_id"]},
# {"$pull":{"scores":{"score":hw_min}}})
except:
print ("Error trying to read collection:" + sys.exc_info()[0])

Mongo: count the number of word occurrences in a set of documents

I have a set of documents in Mongo. Say:
[
{ summary:"This is good" },
{ summary:"This is bad" },
{ summary:"Something that is neither good nor bad" }
]
I'd like to count the number of occurrences of each word (case insensitive), then sort in descending order. The result should be something like:
[
"is": 3,
"bad": 2,
"good": 2,
"this": 2,
"neither": 1,
"nor": 1,
"something": 1,
"that": 1
]
Any idea how to do this? Aggregation framework would be preferred, as I understand it to some degree already :)
MapReduce might be a good fit that can process the documents on the server without doing manipulation on the client (as there isn't a feature to split a string on the DB server (open issue).
Start with the map function. In the example below (which likely needs to be more robust), each document is passed to the map function (as this). The code looks for the summary field and if it's there, lowercases it, splits on a space, and then emits a 1 for each word found.
var map = function() {
var summary = this.summary;
if (summary) {
// quick lowercase to normalize per your requirements
summary = summary.toLowerCase().split(" ");
for (var i = summary.length - 1; i >= 0; i--) {
// might want to remove punctuation, etc. here
if (summary[i]) { // make sure there's something
emit(summary[i], 1); // store a 1 for each word
}
}
}
};
Then, in the reduce function, it sums all of the results found by the map function and returns a discrete total for each word that was emitted above.
var reduce = function( key, values ) {
var count = 0;
values.forEach(function(v) {
count +=v;
});
return count;
}
Finally, execute the mapReduce:
> db.so.mapReduce(map, reduce, {out: "word_count"})
The results with your sample data:
> db.word_count.find().sort({value:-1})
{ "_id" : "is", "value" : 3 }
{ "_id" : "bad", "value" : 2 }
{ "_id" : "good", "value" : 2 }
{ "_id" : "this", "value" : 2 }
{ "_id" : "neither", "value" : 1 }
{ "_id" : "or", "value" : 1 }
{ "_id" : "something", "value" : 1 }
{ "_id" : "that", "value" : 1 }
A basic MapReduce example
var m = function() {
var words = this.summary.split(" ");
if (words) {
for(var i=0; i<words.length; i++) {
emit(words[i].toLowerCase(), 1);
}
}
}
var r = function(k, v) {
return v.length;
};
db.collection.mapReduce(
m, r, { out: { merge: "words_count" } }
)
This will insert word counts into a collection name words_count which you can sort (and index)
Note that it doesn't use stemming, omit punctuation, handles stop words etc.
Also note you can optimize the map function by accumulating repeating word(s) occurrences and emitting the count, not just 1
You can use #split.
Try Below query
db.summary.aggregate([
{ $project : { summary : { $split: ["$summary", " "] } } },
{ $unwind : "$summary" },
{ $group : { _id: "$summary" , total : { "$sum" : 1 } } },
{ $sort : { total : -1 } }
]);
Old question but since 4.2 this can be done with $regexFindAll now.
db.summaries.aggregate([
{$project: {
occurences: {
$regexFindAll: {
input: '$summary',
regex: /\b\w+\b/, // match words
}
}
}},
{$unwind: '$occurences'},
{$group: {
_id: '$occurences.match', // group by each word
totalOccurences: {
$sum: 1 // add up total occurences
}
}},
{$sort: {
totalOccurences: -1
}}
]);
This will output docs in the following format:
{
_id: "matchedwordstring",
totalOccurences: number
}