Is there a way to optimize count with mongoDB - mongodb

I have a index on id_profile and i do db.myCollection.count({"id_profile":xxx}). It's quite fast if the count is low, but if the count is large, it starts being slow. For example if there is 1 000 000 records matching {"id_profile":xxx} then it can take up to 500 ms to return the count. I think that internally the engine is simply loading all the documents matching {"id_profile":xxx} to count them.
Is there a way to quickly retrieve a count where the filter match exactly an index? I would like to avoid to use a counter collection :(
NOTE: I m on mongoDB 3.6.3 and this the script i used:
db.createCollection("following");
db.following.createIndex( {"id_profile": 1}, {unique: false} );
function randInt(n) { return parseInt(Math.random()*n); }
for(var j=0; j<10; j++) {
print("Building op "+j);
var bulkop=db.following.initializeOrderedBulkOp() ;
for (var i = 0; i < 1000000; ++i) {
bulkop.insert(
{
id_profile: NumberLong("-4578128619402503089"),
id_following: NumberLong(randInt(9223372036854775807))
}
)
};
print("Executing op "+j);
bulkop.execute();
}
db.following.count({"id_profile":NumberLong("-4578128619402503089")});

Related

merge sort performance compared to insertion sort

For any array of length greater than 10, is it safe to say that merge sort performs fewer comparisons among the array's elements than does insertion sort on the same array because the best case for the run time of merge sort is O(N log N) while for insertion sort, its O(N)?
My take on this. First off, you are talking about comparisons, but there are swaps as well that matter.
In insertion sort in the worst case (an array sorted in opposite direction) you have to do n^2 - n comparisons and swaps (11^2 - 11 = 121 - 11 = 110 for 11 elements, for example). But if the array is even partially sorted in needed order (I mean many elements already stay at correct positions or even not far from them), the number of swaps&comparisons may significantly drop. The right position for the element will be found pretty soon and there will be no need for performing as many actions as in case of an array sorted in opposite order. So, as you can see for arr2, which is almost sorted, the number of actions will become linear (in relation to the input size) - 6.
var arr1 = [11,10,9,8,7,6,5,4,3,2,1];
var arr2 = [1,2,3,4,5,6,7,8,11,10,9];
function InsertionSort(arr) {
var arr = arr, compNum = 0, swapNum = 0;
for(var i = 1; i < arr.length; i++) {
var temp = arr[i], j = i - 1;
while(j >= 0) {
if(temp < arr[j]) { arr[j + 1] = arr[j]; swapNum++; } else break;
j--;
compNum++;
}
arr[j + 1] = temp;
}
console.log(arr, "Number of comparisons: " + compNum, "Number of swaps: " + swapNum);
}
InsertionSort(arr1); // worst case, 11^2 - 11 = 110 actions
InsertionSort(arr2); // almost sorted array, few actions
In merge sort we always do aprox. n*log n actions - the properties of the input array don't matter. So, as you can see in both cases we will get both of our arrays sorted in 39 actions:
var arr1 = [11,10,9,8,7,6,5,4,3,2,1];
var arr2 = [1,2,3,4,5,6,7,8,11,10,9];
var actions = 0;
function mergesort(arr, left, right) {
if(left >= right) return;
var middle = Math.floor((left + right)/2);
mergesort(arr, left, middle);
mergesort(arr, middle + 1, right);
merge(arr, left, middle, right);
}
function merge(arr, left, middle, right) {
var l = middle - left + 1, r = right - middle, temp_l = [], temp_r = [];
for(var i = 0; i < l; i++) temp_l[i] = arr[left + i];
for(var i = 0; i < r; i++) temp_r[i] = arr[middle + i + 1];
var i = 0, j = 0, k = left;
while(i < l && j < r) {
if(temp_l[i] <= temp_r[j]) {
arr[k] = temp_l[i]; i++;
} else {
arr[k] = temp_r[j]; j++;
}
k++; actions++;
}
while(i < l) { arr[k] = temp_l[i]; i++; k++; actions++;}
while(j < r) { arr[k] = temp_r[j]; j++; k++; actions++;}
}
mergesort(arr1, 0, arr1.length - 1);
console.log(arr1, "Number of actions: " + actions); // 11*log11 = 39 (aprox.)
actions = 0;
mergesort(arr2, 0, arr2.length - 1);
console.log(arr2, "Number of actions: " + actions); // 11*log11 = 39 (aprox.)
So, answering your question:
For any array of length greater than 10, is it safe to say that merge sort performs fewer comparisons among the array's elements than does insertion sort on the same array
I would say that no, it isn't safe to say so. Merge sort can perform more actions compared to insertion sort in some cases. The size of an array isn't important here. What is important in this particular case of comparing insertion sort vs. merge sort is how far from the sorted state is your array. I hope it helps :)
BTW, merge sort and insertion sort have been united in a hybrid stable sorting algorithm called Timsort to get the best from both of them. Check it out if interested.

Peculiar issue with quicksort partition

Today, when trying quicksort, instead of taking last element as pivot and partitioning,i took the first element as pivot, But it is not producing the correct partitioned output.
int pivot = ar[0];
int pindex = 0;
for(int i = 0;i < ar.size();i++)
{
if(ar[i] <= pivot)
{
swap(ar[i],ar[pindex]);
pindex++;
}
}
swap(ar[pindex],ar[ar.size()-1]);
I could not understand why, i always use this for partition, but this is not working when i take first element as partition.
But this worked even if i took first element as partition
int i, j, pivot, temp;
pivot = ar[0];
i = 0;
j = ar.size()-1;
while(1)
{
while(ar[i] < pivot && ar[i] != pivot)
i++;
while(ar[j] > pivot && ar[j] != pivot)
j--;
if(i < j)
{
temp = ar[i];
ar[i] = ar[j];
ar[j] = temp;
}
else
{
break;
}
}
What are the differences between them.
At last found that, this method is Hoare's partition method, where as the typical quick sort method we all follow is lomuto's partition.
See this wiki page, it has all details https://en.wikipedia.org/wiki/Quicksort

How to paginate and group in MongoDB?

My objects are of the following structure:
{id: 1234, ownerId: 1, typeId: 3456, date:...}
{id: 1235, ownerId: 1, typeId: 3456, date:...}
{id: 1236, ownerId: 1, typeId: 12, date:...}
I would like to query the database so that it returns all the items that belong to a given ownerId but only the first item of a given typeId. IE the typeId field is unique in the results. I would also like to be able to use skip and limit.
In SQL the query would be something like:
SELECT * FROM table WHERE ownerId=1 SORT BY date GROUP BY typeId LIMIT 10 OFFSET 300
I currently have the following query (using pymongo) but it is giving my errors for using $sort, $limit and $skip:
search_dict['ownerId'] = 1
search_dict['$sort'] = {'date': -1}
search_dict['$limit'] = 10
search_dict['$skip'] = 200
collectionName.group(['typeId'], search_dict, {'list': []}, 'function(obj, prev) {prev.list.push(obj)}')
-
I have also tried the aggregation route but as I understand grouping will touch all the items in the collection, group them, and then limit and skip. This will be too computationally expensive and slow. I need an iterative grouping algorithm.
search_dict = {'ownerId':1}
collectionName.aggregate([
{
'$match': search_dict
},
{
'$sort': {'date': -1}
},
{
'$group': {'_id': "$typeId"}
},
{
'$skip': skip
},
{
'$limit': 10
}
])
Your aggregation looks correct. You need to include the fields you want in the output in the $group stage using $first.
grouping will touch all the items in the collection, group them, and then limit and skip. This will be too computationally expensive and slow.
It won't touch all items in the collection. If the match + sort is indexed ({ "ownerId" : 1, "date" : -1 }), the index will be used for the match + sort, and the group will only process the documents that are the result of the match.
The constraint is hardly ever cpu, except in cases of unindexed sort. It's usually disk I/O.
I need an iterative grouping algorithm.
What precisely do you mean by "iterative grouping"? The grouping is iterative, as it iterates over the result of the previous stage and checks which group each document belongs to!
I am not to sure how you get the idea that this operation should be computational expensive. This isn't really true for most SQL databases, and it surely isn't for MongoDB. All you need is to create an index over your sort criterium.
Here is how to prove it:
Open up a mongo shell and have this executed.
var bulk = db.speed.initializeOrderedBulkOp()
for ( var i = 1; i <= 100000; i++ ){
bulk.insert({field1:i,field2:i*i,date:new ISODate()});
if((i%100) == 0){print(i)}
}
bulk.execute();
The bulk execution may take some seconds. Next, we create a helper function:
Array.prototype.avg = function() {
var av = 0;
var cnt = 0;
var len = this.length;
for (var i = 0; i < len; i++) {
var e = +this[i];
if(!e && this[i] !== 0 && this[i] !== '0') e--;
if (this[i] == e) {av += e; cnt++;}
}
return av/cnt;
}
The troupe is ready, the stage is set:
var times = new Array();
for( var i = 0; i < 10000; i++){
var start = new Date();
db.speed.find().sort({date:-1}).skip(Math.random()*100000).limit(10);
times.push(new Date() - start);
}
print(times.avg() + " msecs");
The output is in msecs. This is the output of 5 runs for comparison:
0.1697 msecs
0.1441 msecs
0.1397 msecs
0.1682 msecs
0.1843 msecs
The test server runs inside a docker image which in turn runs inside a VM (boot2docker) on my 2,13 GHz Intel Core 2 Duo with 4GB of RAM, running OSX 10.10.2, a lot of Safari windows, iTunes, Mail, Spotify and Eclipse additionally. Not quite a production system. And that collection does not even have an index on the date field. With the index, the averages of 5 runs look like this:
0.1399 msecs
0.1431 msecs
0.1339 msecs
0.1441 msecs
0.1767 msecs
qed, hth.

In mongoDB can I efficiently group documents into groups of a set size?

I need to group data into subgroups of a set size. Like if there are 6 records, ordered by date.
[1,2,3,4,5,6]
and I have a subgroup size of 2. I would end up with an array(length of 3) of arrays(each length 2):
[[1,2],[3,4],[5,6]]
Nothing about the record factors into the grouping, just how they are ordered over all and the subgroup size.
Does the aggregation framework have something that would help with this?
The best way to currently do this is with mapReduce:
db.collection.mapReduce(
function() {
var result = [];
var x = 0;
for ( x=0; x < Math.floor( this.array.length / 2 ); x+2 ) {
result.push( this.array.slice( x, x+2 ) );
}
var diff = Math.ceil( this.array.length )
- Math.floor( this.array.length );
if ( diff != 1 )
result.push( this.array.slice( x, x+diff ) );
emit( this._id, result );
},
function(){},
{
"out": { "inline": 1 }
}
);
Or basically something along those lines.
The aggregation framework does not do slice type operations well, but JavaScript processes do, especially in this case.

Translate sql query to MongoDB

Hi im newbie into MongoDB and im needing to translate this sql query to mongodb using two techniques first in MapReduce method and other Aggregation method. Someone may help?
select
sum(l_extendedprice*l_discount) as revenue
from
lineitem
where
l_shipdate >= date '1994-01-01'
and l_shipdate < date '1994-01-01' + interval '1' year
and l_discount between 0.06 - 0.01 and 0.06 + 0.01
and l_quantity < 24;
http://www.mongodb.org/display/DOCS/MapReduce
For your sample, using map/reduce
var m = function () { emit(1, {this.l_extendedprice * this.l_discount})};
var r = function (k, vals) {
var sum = 0;
for (var i = 0; i < vals.length; i++) {
sum += vals[i];
}
return sum;
}
var res = db.stuff.mapReduce(m, r, {
out:"stuff_aggr",
query: {
"l_shipdate": {$gte: ISODate("1994-01-01T00:00:00.000Z")},
"l_shipdate": {$lte: ISODate("1995-01-01T00:00:00.000Z")},
"l_discount": {$gte: 0.05},
"l_discount": {$lte: 0.07},
"l_quantity": {$lt: 24}
}
});
Aggregation is still a beta feature. MapReduce is still the better option. Am assuming you wanted to see if a complex where clause can be handled easily... Its not that different from SQL as long as you are restricting yourself to one collection/table.