I have a mongodb replica set with a lot of databases, collections & indexes.
We did a lot of refactor and optimization and, of course, I have a lot of "creative queries" from the consumers.
I would like to clean up the unused indexes. just wanna save some space.
How can I check if an index is being used? I can afford to check index by index and drop the unused ones.
Running an "explain" in all the possible queries is not an option :)
EDIT: SOLUTION BASED ON THE ACCEPTED ANSWER
The script was bugged. I am not a javascript expert, but I put the corrected script. I hope will be useful for someone:
DB.prototype.indexStats = function() {
var queries = [];
var collections = db.getCollectionNames();
var findQuery = function(q) {
for(entryIdx in queries) {
if(q == queries[entryIdx].query) {
return entryIdx;
}
}
return -1;
}
for(cIdx in collections) {
var cName = collections[cIdx];
var nsName = db.getName()+"."+cName;
if(cName.indexOf("system") == -1) {
var i = 1;
var count = db.system.profile.count({ns:nsName});
print('scanning profile {ns:"'+nsName+'"} with '+count+' records... this could take a while...');
db.system.profile.find({ns:nsName}).addOption(16).batchSize(10000).forEach(function(profileDoc) {
if(profileDoc.query && !profileDoc.query["$explain"]) {
var qIdx = findQuery(profileDoc.query);
if(qIdx == -1 && profileDoc.query["query"] ) {
var size = queries.push({query:profileDoc.query, count:1, index:""});
var explain = db[cName].find(queries[size-1].query).explain();
if(profileDoc.query && profileDoc.query["query"]) {
queries[size-1].sort = profileDoc.query["orderby"];
if(queries[size-1].sort) {
explain = db[cName].find(queries[size-1].query.query).sort(queries[size-1].sort).explain();
}
}
queries[size-1].cursor = explain.cursor;
queries[size-1].millis = explain.millis;
queries[size-1].nscanned = explain.nscanned;
queries[size-1].n = explain.n;
queries[size-1].scanAndOrder = explain.scanAndOrder ? true : false;
if(explain.cursor && explain.cursor != "BasicCursor") {
queries[size-1].index = explain.cursor.split(" ")[1];
} else {
print('warning, no index for query {ns:"'+nsName+'"}: ');
printjson(profileDoc.query);
print('... millis: ' + queries[size-1].millis);
print('... nscanned/n: ' + queries[size-1].nscanned + '/' + queries[size-1].n);
print('... scanAndOrder: ' + queries[size-1].scanAndOrder);
}
} else if ( qIdx != -1 ) {
queries[qIdx].count++;
}
}
});
}
}
for(cIdx in collections) {
var cName = collections[cIdx];
if(cName.indexOf("system") == -1) {
print('checking for unused indexes in: ' + cName);
for(iIdx in db[cName].getIndexes()) {
var iName = db[cName].getIndexes()[iIdx].name;
if(iName.indexOf("system") == -1) {
var stats = db[cName].stats();
var found = false;
for(qIdx in queries) {
if(queries[qIdx].index == iName) {
found = true;
break;
}
}
if(!found) {
print('this index is not being used: ');
printjson(iName);
}
}
}
}
}
}
The simplest solution to this is to use the mongodb inbuilt $indexStats aggregation stage, added in MongoDB 3.2.
Using the Mongo console:
db.collection.aggregate([ { $indexStats: { } } ])
Using PyMongo:
from pymongo import MongoClient
collection = MongoClient()[db_name][collection_name]
index_stats = collection.aggregate([{'$indexStats':{}}])
for index_info in index_stats:
print index_info
There is a pretty cool script out on Github that you should look at:
https://github.com/wfreeman/indexalizer
Basically it involves turning on profiling for your database and then it will use the data collected by the profiler to drive explain() calls. It then tells you both which indexes are not being used and which queries are not using indexes. Pretty slick.
More about mongoDB database profiling:
http://docs.mongodb.org/manual/reference/database-profiler/
Related
I'm trying to find a certain document in my mongodb then update the int value of it using find query, I'm using $in because I used an array to find each element inside it, but when I used ObjectId it gives me error:
bloodinventoryDocs is not iterable
Here is what I did
var mongoose = require('mongoose');
var id = mongoose.Types.ObjectId('5c014c999cc48c3b0057988b');
var newValue = 1;
var newBloodgroup = "A_positive";
var newGetbloodcomponent = "Whole Blood";
Bloodinventory.find({ blood_component : { $in : newGetbloodcomponent} , blood_group: { $in :newBloodgroup},chapter: { $in :id}}, function(err, bloodinventoryDocs) {
for(let bloodinventory of bloodinventoryDocs) {
bloodinventory.num_stock = bloodinventory.num_stock + newValue ;
bloodinventory.save(function(err) {
if (err) {
console.log(err);
} else {
console.log('success');
}
});
}
});
Just use chapter: { $in: [id] }
Can someone tell me how to search data in liteDb using the following pseudocode ?
Pseudo Code
col = db.GetCollection<Product>("products");
string keyword = "1AS";
Query query;
if (condition1)
{
query += Query.Contains("ProductName", keyword);
}
if (condition2)
{
query += Query.Contains("ProductModel", keyword);
}
if (condition3)
{
query += Query.Contains("Note", keyword);
}
if (query.Any()) //
{
var data = col.Find(query).toList();
}
Thanks in advance
You can use Query.And(params Query[] queries), like this:
var list = new List<Query>();
if (condition1)
{
list.Add(Query.Contains("ProductName", keyword));
}
if (condition2)
{
list.Add(Query.Contains("ProductModel", keyword));
}
...
if (list.Count > 0) //
{
var q = list.Count == 1 ? list.First() : Query.And(list.ToArray());
var data = col.Find(q);
}
I have collection of data in Mongodb, i want to give best matches suggestion while user input query in our suggestion box,
when user start typing com suggestion should be:
Computer
Computer Science
something more alike
I am sorting in Node by getting all matched data from mongo first and then give a rank to each data
function rank(name, q) {
var len = name.length,
lastIndex = -1;
for(var i = 0; i < q.length; i++) {
var n = name.indexOf(q[i], (lastIndex + 1));
if(n !== -1) {
len--;
lastIndex = n;
}
}
return len;
}
var query = 'com';
// giving rank to data
data = data.map(function(v) {
v.rank = rank(v.value, query);
return v;
});
// sorting by rank
data = data.sort(function(a, b) {
return a.rank - b.rank
});
It is giving me satisfied result, but it will be too slow while dealing with large data.
I want let mongodb engine to deal with sorting and give me just limited best matches result.
Maybe you could do it through mapreduce. Map-reduce is a data processing paradigm for condensing large volumes of data into useful aggregated results.
var mapFn = function(){
var len = this.name.length,
lastIndex = -1;
var q = 'com';
for(var i = 0; i < q.length; i++) {
var n = this.name.indexOf(q[i], (lastIndex + 1));
if(n !== -1) {
len--;
lastIndex = n;
}
}
emit(len, this);
};
var reduceFn = function(key, values){
return values.sort(function(a,b){
return a.name - b.name;
});
};
db.collection.mapReduce(mapFn, reduceFn, { out: { reduce: 'result_collection'}});
I have mongodb in which there is 3 huge collections say 'A', 'B' and 'C'
Each collection contains about 2 million documents.
There are certain properties for each of the document.
Each document need to be updated based on those values of certain properties, from which i can determine what should be the '$set' to that document.
currently i am using the same approach for each collection.
that to find all documents in batches. collection them in memory (which i think the culprit for the current approach), then one by one update them all.
For the first collection(that have similar data as in other collections), it takes 10 minutes to get completed. then the next two collections taking 2 hours approx to get the task done or mongodb client get crashed earlier.
There is something wrong and no desired in the current approach.
Model.collection.find({}).batchSize(BATCH).toArray(function(err, docs){
if(err || !docs || !docs.length)
return afterCompleteOneCollection(err);
var spec = function(index) {
if(index % 1000 === 0) console.log('at index : ' + index);
var toSet = { };
var toUnset = { };
var over = function(){
var afterOver = function(err){
if(err) return afterCompleteOneCollection(err);
if(index < docs.length - 1) spec(index+1);
else afterCompleteOneCollection(null);
};
var sb = Object.keys(toSet).length;
var ub = Object.keys(toUnset).length;
if(sb || ub) {
var all = {};
if(sb) all.$set = toSet;
if(ub) all.$unset = toUnset;
Model.collection.update({ _id : docs[index]._id }, all, {}, afterOver);
} else afterOver(null);
};
forEachOfDocument(docs[index], toSet, toUnset, over);
};
spec(0);
});
Is there any better solution for the same.?
The streaming approach from here http://mongodb.github.io/node-mongodb-native/api-generated/cursor.html#stream worked for me
This is what i am doing :
var stream = Model.collection.find().stream();
stream.on('data', function(data){
if(data){
var toSet = { };
var toUnset = { };
var over = function(){
var afterOver = function(err){
if(err) console.log(err);
};
var sb = Object.keys(toSet).length;
var ub = Object.keys(toUnset).length;
if(sb || ub) {
var all = {};
if(sb) all.$set = toSet;
if(ub) all.$unset = toUnset;
Model.collection.update({ _id : data._id }, all, {}, afterOver);
} else afterOver(null);
};
forEachOfDocument(data, toSet, toUnset, over);
}
});
stream.on('close', function() {
afterCompleteOneCollection();
});
I have a mongodb replica set with a lot of databases, collections & indexes.
We did a lot of refactor and optimization and, of course, I have a lot of "creative queries" from the consumers.
I would like to clean up the unused indexes. just wanna save some space.
How can I check if an index is being used? I can afford to check index by index and drop the unused ones.
Running an "explain" in all the possible queries is not an option :)
EDIT: SOLUTION BASED ON THE ACCEPTED ANSWER
The script was bugged. I am not a javascript expert, but I put the corrected script. I hope will be useful for someone:
DB.prototype.indexStats = function() {
var queries = [];
var collections = db.getCollectionNames();
var findQuery = function(q) {
for(entryIdx in queries) {
if(q == queries[entryIdx].query) {
return entryIdx;
}
}
return -1;
}
for(cIdx in collections) {
var cName = collections[cIdx];
var nsName = db.getName()+"."+cName;
if(cName.indexOf("system") == -1) {
var i = 1;
var count = db.system.profile.count({ns:nsName});
print('scanning profile {ns:"'+nsName+'"} with '+count+' records... this could take a while...');
db.system.profile.find({ns:nsName}).addOption(16).batchSize(10000).forEach(function(profileDoc) {
if(profileDoc.query && !profileDoc.query["$explain"]) {
var qIdx = findQuery(profileDoc.query);
if(qIdx == -1 && profileDoc.query["query"] ) {
var size = queries.push({query:profileDoc.query, count:1, index:""});
var explain = db[cName].find(queries[size-1].query).explain();
if(profileDoc.query && profileDoc.query["query"]) {
queries[size-1].sort = profileDoc.query["orderby"];
if(queries[size-1].sort) {
explain = db[cName].find(queries[size-1].query.query).sort(queries[size-1].sort).explain();
}
}
queries[size-1].cursor = explain.cursor;
queries[size-1].millis = explain.millis;
queries[size-1].nscanned = explain.nscanned;
queries[size-1].n = explain.n;
queries[size-1].scanAndOrder = explain.scanAndOrder ? true : false;
if(explain.cursor && explain.cursor != "BasicCursor") {
queries[size-1].index = explain.cursor.split(" ")[1];
} else {
print('warning, no index for query {ns:"'+nsName+'"}: ');
printjson(profileDoc.query);
print('... millis: ' + queries[size-1].millis);
print('... nscanned/n: ' + queries[size-1].nscanned + '/' + queries[size-1].n);
print('... scanAndOrder: ' + queries[size-1].scanAndOrder);
}
} else if ( qIdx != -1 ) {
queries[qIdx].count++;
}
}
});
}
}
for(cIdx in collections) {
var cName = collections[cIdx];
if(cName.indexOf("system") == -1) {
print('checking for unused indexes in: ' + cName);
for(iIdx in db[cName].getIndexes()) {
var iName = db[cName].getIndexes()[iIdx].name;
if(iName.indexOf("system") == -1) {
var stats = db[cName].stats();
var found = false;
for(qIdx in queries) {
if(queries[qIdx].index == iName) {
found = true;
break;
}
}
if(!found) {
print('this index is not being used: ');
printjson(iName);
}
}
}
}
}
}
The simplest solution to this is to use the mongodb inbuilt $indexStats aggregation stage, added in MongoDB 3.2.
Using the Mongo console:
db.collection.aggregate([ { $indexStats: { } } ])
Using PyMongo:
from pymongo import MongoClient
collection = MongoClient()[db_name][collection_name]
index_stats = collection.aggregate([{'$indexStats':{}}])
for index_info in index_stats:
print index_info
There is a pretty cool script out on Github that you should look at:
https://github.com/wfreeman/indexalizer
Basically it involves turning on profiling for your database and then it will use the data collected by the profiler to drive explain() calls. It then tells you both which indexes are not being used and which queries are not using indexes. Pretty slick.
More about mongoDB database profiling:
http://docs.mongodb.org/manual/reference/database-profiler/