Meteor Publish Distinct Values of Field in Collection - mongodb

I'm stuck on a pretty simple scenario in Meteor:
I have a huge collection of things with many fields, some of them containing quite a bit of text.
I want to create a page for searching that collection.
One of the fields that each item in the collection has is "category".
I'd like to give the user the ability to filter by that category.
For that, I need to publish just the distinct values of the category field in the collection.
I can't figure out a way to do that without publishing the whole collection which takes way too long. How can I publish just the distinct categories and use them to fill a dropdown?
Bonus question and somewhat related: How do I publish a count of all items in the collection without publishing the whole collection?

A good starting point to make this easier would be to normalize your categories into a separate database collection.
However assuming that is not possible or practical, the best (though imperfect) solution will be to publish two separate versions of your collection, one which returns only the categories field of the entire collection and another which returns all fields of the collection for the selected category only. That would look like the following:
// SERVER
Meteor.startup(function(){
Meteor.publish('allThings', function() {
// return only id and categories field for all your things
return Things.find({}, {fields: {categories: 1}});
});
Meteor.publish('thingsByCategory', function(category) {
// return all fields for things having the selected category
// you can then subscribe via something like a client-side Session variable
// e.g., Meteor.subscribe("thingsByCategory", Session.get("category"));
return Things.find({category: category});
});
});
Note that you will still need to assemble your array of categories client side from the Things cursor (for example, by using underscore's _.pluck and _.uniq methods to grab the categories and remove any dups). But the data set will be much smaller as you are only working with single-field documents now.
(Note that ideally, you would want to use Mongo's distinct() method in your publish function to publish only the distinct categories, but that is not possible directly as it returns an array which cannot be published).

You could use the internal this._documents.collectionName to only send new categories down to the client. Tracking which categories to remove becomes a bit ugly so you probably will still end up maintaining a separate 'categories' collection eventually.
Example:
Meteor.publish( 'categories', function(){
var self = this;
largeCollection.find({},{fields: {category: 1}).observeChanges({
added: function( id, doc ){
if( ! self._documents.categories[ doc.category ] )
self.added( 'categories', doc.category, {category: doc.category});
},
removed: function(){
_.keys( self._documents.categories ).forEach( category ){
if ( largeCollection.find({category: category},{limit: 1}).count() === 0 )
self.removed( 'categories', category );
}
}
});
self.ready();
};

Re: the bonus question, publishing counts: take a look at the meteorite package publish-counts. I think that does what you want.

These patterns might be helpful to you. Here is a publication that publishes counts:
/*****************************************************************************/
/* Counts Publish Function
/*****************************************************************************/
// server: publish the current size of a collection
Meteor.publish("countsByProject", function (arguments) {
var self = this;
if (this.userId) {
var roles = Meteor.users.findOne({_id : this.userId}).roles;
if ( _.contains(roles, arguments.projectId) ) {
//check(arguments.video_id, Integer);
// observeChanges only returns after the initial `added` callbacks
// have run. Until then, we don't want to send a lot of
// `self.changed()` messages - hence tracking the
// `initializing` state.
Videos.find({'projectId': arguments.projectId}).forEach(function (video) {
var count = 0;
var initializing = true;
var video_id = video.video_id;
var handle = Observations.find({video_id: video_id}).observeChanges({
added: function (id) {
//console.log(video._id);
count++;
if (!initializing)
self.changed("counts", video_id, {'video_id': video_id, 'observations': count});
},
removed: function (id) {
count--;
self.changed("counts", video_id, {'video_id': video_id, 'observations': count});
}
// don't care about changed
});
// Instead, we'll send one `self.added()` message right after
// observeChanges has returned, and mark the subscription as
// ready.
initializing = false;
self.added("counts", video_id, {'video_id': video_id, 'observations': count});
self.ready();
// Stop observing the cursor when client unsubs.
// Stopping a subscription automatically takes
// care of sending the client any removed messages.
self.onStop(function () {
handle.stop();
});
}); // Videos forEach
} //if _.contains
} // if userId
return this.ready();
});
And here is one that creates a new collection from a specific field:
/*****************************************************************************/
/* Tags Publish Functions
/*****************************************************************************/
// server: publish the current size of a collection
Meteor.publish("tags", function (arguments) {
var self = this;
if (this.userId) {
var roles = Meteor.users.findOne({_id : this.userId}).roles;
if ( _.contains(roles, arguments.projectId) ) {
var observations, tags, initializing, projectId;
initializing = true;
projectId = arguments.projectId;
observations = Observations.find({'projectId' : projectId}, {fields: {tags: 1}}).fetch();
tags = _.pluck(observations, 'tags');
tags = _.flatten(tags);
tags = _.uniq(tags);
var handle = Observations.find({'projectId': projectId}, {fields : {'tags' : 1}}).observeChanges({
added: function (id, fields) {
if (!initializing) {
tags = _.union(tags, fields.tags);
self.changed("tags", projectId, {'projectId': projectId, 'tags': tags});
}
},
removed: function (id) {
self.changed("tags", projectId, {'projectId': projectId, 'tags': tags});
}
});
initializing = false;
self.added("tags", projectId, {'projectId': projectId, 'tags': tags});
self.ready();
self.onStop(function () {
handle.stop();
});
} //if _.contains
} // if userId
return self.ready();
});

I have not tested it on Meteor, and according to the replies, I'm getting skeptical that it will work but using a mongoDB distinct would do the trick.
http://docs.mongodb.org/manual/reference/method/db.collection.distinct/

Related

Publishing Counts for objects in Meteor

On my server side I have for various objects a publication which basically returns the count. Every different object has a different publication name like this:
Meteor.publish('object1Count', function(...
Meteor.publish('object2Count', function(...
Which are something like this:
Meteor.publish('object1Count', function(arg) {
var self = this;
var count = 0;
var initializing = true;
var query = arg?{arg:arg}:{};
var projection = !arg?{limit:1}:{};
var handle = Object1.find(query, projection).observeChanges({
added: function (idx) {
count++;
if (!initializing)
self.changed("totalcounts", 1, {count: count});
},
removed: function (idx) {
count--;
self.changed("totalcounts", 1, {count: count});
}
});
initializing = false;
self.added("totalcounts", 1, {count: count});
self.ready();
self.onStop(function () {
handle.stop();
});
});
But as you see inside each of these methods there will be this line
self.added("totalcounts", 1, {count: count});
In fact on the client side when I need to access the count of an Object I do like this:
template.subscribe('object1Count', template.reactiveEventId.get());
...
TotalCounts = (typeof TotalCounts==='undefined')?new Mongo.Collection("totalcounts"):TotalCounts;
It apparently works, but now that I read it twice I wonder why, the "totalcounts" collection looks like the same for all the objects, so if I switch between pages needing different totalcounts (for different objects), I guess that the client destroys the local collection totalcounts and creates a new one. Does this happen also server side?
So finally my question is: what is the best practice? The projects need the total counts for various reasons: pagination, charts, etc.. I want to create the total counts server side and just pass the minimum data for that. Should I create different "totalcounts" for every object? What's the efficient way of doing this?
Thanks
self.added("totalcounts", 1, {count: count});
it means add to collection name totalcounts a document with _id is 1 and the rest of data is {count: count}.
Because they have the same _id then you can't make more than 1 subscription.
Btw, when the template is "unmounted" it will auto stop subscriptions.

How do I publish two random items from a Meteor collection?

I'm making an app where two random things from a collection are displayed to the user. Every time the user refreshes the page or clicks on a button, she would get another random pair of items.
For example, if the collection were of fruits, I'd want something like this:
apple vs banana
peach vs pineapple
banana vs peach
The code below is for the server side and it works except for the fact that the random pair is generated only once. The pair doesn't update until the server is restarted. I understand it is because generate_pair() is only called once. I have tried calling generate_pair() from one of the Meteor.publish functions but it only sometimes works. Other times, I get no items (errors) or only one item.
I don't mind publishing the entire collection and selecting random items from the client side. I just don't want to crash the browser if Items has 30,000 entries.
So to conclude, does anyone have any ideas of how to get two random items from a collection appearing on the client side?
var first_item, second_item;
// This is the best way I could find to get a random item from a Meteor collection
// Every item in Items has a 'random_number' field with a randomly generated number between 0 and 1
var random_item = function() {
return Items.find({
random_number: {
$gt: Math.random()
}
}, {
limit: 1
});
};
// Generates a pair of items and ensure that they're not duplicates.
var generate_pair = function() {
first_item = random_item();
second_item = random_item();
// Regenerate second item if it is a duplicate
while (first_item.fetch()[0]._id === second_item.fetch()[0]._id) {
second_item = random_item();
}
};
generate_pair();
Meteor.publish('first_item', function() {
return first_item;
});
// Is this good Meteor style to have two publications doing essentially the same thing?
Meteor.publish('second_item', function() {
return second_item;
});
The problem with your approach is that subscribing to the same publication with the same arguments (no arguments in this case) over and over in the client will only get you subscribed only once to the server-side logic, this is because Meteor is optimizing its internal Pub/Sub mechanism.
To truly discard the previous subscription and get the server-side publish code to re-execute and send two new random documents, you need to introduce a useless random argument to your publication, your client-side code will subscribe over and over to the publication with a random number and each time you'll get unsubscribed and resubscribed to new random documents.
Here is a full implementation of this pattern :
server/server.js
function randomItemId(){
// get the total items count of the collection
var itemsCount = Items.find().count();
// get a random number (N) between [0 , itemsCount - 1]
var random = Math.floor(Random.fraction() * itemsCount);
// choose a random item by skipping N items
var item = Items.findOne({},{
skip: random
});
return item && item._id;
}
function generateItemIdPair(){
// return an array of 2 random items ids
var result = [
randomItemId(),
randomItemId()
];
//
while(result[0] == result[1]){
result[1] = randomItemId();
}
//
return result;
}
Meteor.publish("randomItems",function(random){
var pair = generateItemIdPair();
// publish the 2 items whose ids are in the random pair
return Items.find({
_id: {
$in: pair
}
});
});
client/client.js
// every 5 seconds subscribe to 2 new random items
Meteor.setInterval(function(){
Meteor.subscribe("randomItems", Random.fraction(), function(){
console.log("fetched these random items :", Items.find().fetch());
});
}, 5000);
You'll need to meteor add random for this code to work.
Meteor.publish 'randomDocs', ->
ids = _(Docs.find().fetch()).pluck '_id'
randomIds = _(ids).sample 2
Docs.find _id: $in: randomIds
Here's another approach, uses the excellent publishComposite package to populate matches in a local (client-only) collection so it doesn't conflict with other uses of the main collection:
if (Meteor.isClient) {
randomDocs = new Mongo.Collection('randomDocs');
}
if (Meteor.isServer) {
Meteor.publishComposite("randomDocs",function(select_count) {
return {
collectionName:"randomDocs",
find: function() {
let self=this;
_.sample(baseCollection.find({}).fetch(),select_count).forEach(function(doc) {
self.added("randomDocs",doc._id,doc);
},self);
self.ready();
}
}
});
}
in onCreated: this.subscribe("randomDocs",3);
(then in a helper): return randomDocs.find({},{$limit:3});

Are DBRefs supported in Meteor yet? [duplicate]

I'm using meteor 0.3.7 in Win7(32) and trying to create a simple logging system using 2 MongoDB collections to store data that are linked by DBRef.
The current pseudo schema is :
Users {
username : String,
password : String,
created : Timestamp,
}
Logs {
user_id : DBRef {$id, $ref}
message : String
}
I use server methods to insert the logs so I can do some upserts on the clients collection.
Now I want to do an old "left join" and display a list of the last n logs with the embedded User name.
I don't want to embed the Logs in Users because the most used operation is getting the last n logs. Embedding in my opinion was going to have a big impact in performance.
What is the best approach to achieve this?
Next it was great if possible to edit the User name and all items change theis name
Regards
Playing around with Cursor.observe answered my question. It may not be the most effective way of doing this, but solves my future problems of derefering DBRefs "links"
So for the server we need to publish a special collection. One that can enumerate the cursor and for each document search for the corresponding DBRef.
Bare in mind this implementation is hardcoded and should be done as a package like UnRefCollection.
Server Side
CC.Logs = new Meteor.Collection("logs");
CC.Users = new Meteor.Collection("users");
Meteor.publish('logsAndUsers', function (page, size) {
var self = this;
var startup = true;
var startupList = [], uniqArr = [];
page = page || 1;
size = size || 100;
var skip = (page - 1) * size;
var cursor = CC.Logs.find({}, {limit : size, skip : skip});
var handle = cursor.observe({
added : function(doc, idx){
var clone = _.clone(doc);
var refId = clone.user_id.oid; // showld search DBRefs
if (startup){
startupList.push(clone);
if (!_.contains(uniqArr, refId))
uniqArr.push(refId);
} else {
// Clients added logs
var deref = CC.Users.findOne({_id : refid});
clone.user = deref;
self.set('logsAndUsers', clone._id, clone);
self.flush();
}
},
removed : function(doc, idx){
self.unset('logsAndUsers', doc._id, _.keys(doc));
self.flush();
},
changed : function(new_document, idx, old_document){
var set = {};
_.each(new_document, function (v, k) {
if (!_.isEqual(v, old_document[k]))
set[k] = v;
});
self.set('logsAndUsers', new_document._id, set);
var dead_keys = _.difference(_.keys(old_document), _.keys(new_document));
self.unset('logsAndUsers', new_document._id, dead_keys);
self.flush();
},
moved : function(document, old_index, new_index){
// Not used
}
});
self.onStop(function(){
handle.stop();
});
// Deref on first Run
var derefs = CC.Users.find({_id : {$in : uniqArr} }).fetch();
_.forEach(startupList, function (item){
_.forEach(derefs, function(ditems){
if (item["user_id"].oid === ditems._id){
item.user = ditems;
return false;
}
});
self.set('logsAndUsers', item._id, item);
});
delete derefs; // Not needed anymore
startup = false;
self.complete();
self.flush();
});
For each added logs document it'll search the users collection and try to add to the logs collection the missing information.
The added function is called for each document in the logs collection in the first run I created a startupList and an array of unique users ids so for the first run it'll query the db only once. Its a good idea to put a paging mechanism to speed up things.
Client Side
On the client, subscribe to the logsAndUsers collection, if you want to make changes do it directly to the Logs collection.
LogsAndUsers = new Meteor.collection('logsAndUser');
Logs = new Meteor.colection('logs'); // Changes here are observed in the LogsAndUsers collection
Meteor.autosubscribe(function () {
var page = Session.get('page') || 1;
Meteor.subscribe('logsAndUsers', page);
});
Why not just also store the username in the logs collection as well?
Then you can query on them directly without needing any kind of "join"
If for some reason you need to be able to handle that username change, you just fetch the user object by name, then query on Logs with { user_id : user._id }

Meteor and DBRefs

I'm using meteor 0.3.7 in Win7(32) and trying to create a simple logging system using 2 MongoDB collections to store data that are linked by DBRef.
The current pseudo schema is :
Users {
username : String,
password : String,
created : Timestamp,
}
Logs {
user_id : DBRef {$id, $ref}
message : String
}
I use server methods to insert the logs so I can do some upserts on the clients collection.
Now I want to do an old "left join" and display a list of the last n logs with the embedded User name.
I don't want to embed the Logs in Users because the most used operation is getting the last n logs. Embedding in my opinion was going to have a big impact in performance.
What is the best approach to achieve this?
Next it was great if possible to edit the User name and all items change theis name
Regards
Playing around with Cursor.observe answered my question. It may not be the most effective way of doing this, but solves my future problems of derefering DBRefs "links"
So for the server we need to publish a special collection. One that can enumerate the cursor and for each document search for the corresponding DBRef.
Bare in mind this implementation is hardcoded and should be done as a package like UnRefCollection.
Server Side
CC.Logs = new Meteor.Collection("logs");
CC.Users = new Meteor.Collection("users");
Meteor.publish('logsAndUsers', function (page, size) {
var self = this;
var startup = true;
var startupList = [], uniqArr = [];
page = page || 1;
size = size || 100;
var skip = (page - 1) * size;
var cursor = CC.Logs.find({}, {limit : size, skip : skip});
var handle = cursor.observe({
added : function(doc, idx){
var clone = _.clone(doc);
var refId = clone.user_id.oid; // showld search DBRefs
if (startup){
startupList.push(clone);
if (!_.contains(uniqArr, refId))
uniqArr.push(refId);
} else {
// Clients added logs
var deref = CC.Users.findOne({_id : refid});
clone.user = deref;
self.set('logsAndUsers', clone._id, clone);
self.flush();
}
},
removed : function(doc, idx){
self.unset('logsAndUsers', doc._id, _.keys(doc));
self.flush();
},
changed : function(new_document, idx, old_document){
var set = {};
_.each(new_document, function (v, k) {
if (!_.isEqual(v, old_document[k]))
set[k] = v;
});
self.set('logsAndUsers', new_document._id, set);
var dead_keys = _.difference(_.keys(old_document), _.keys(new_document));
self.unset('logsAndUsers', new_document._id, dead_keys);
self.flush();
},
moved : function(document, old_index, new_index){
// Not used
}
});
self.onStop(function(){
handle.stop();
});
// Deref on first Run
var derefs = CC.Users.find({_id : {$in : uniqArr} }).fetch();
_.forEach(startupList, function (item){
_.forEach(derefs, function(ditems){
if (item["user_id"].oid === ditems._id){
item.user = ditems;
return false;
}
});
self.set('logsAndUsers', item._id, item);
});
delete derefs; // Not needed anymore
startup = false;
self.complete();
self.flush();
});
For each added logs document it'll search the users collection and try to add to the logs collection the missing information.
The added function is called for each document in the logs collection in the first run I created a startupList and an array of unique users ids so for the first run it'll query the db only once. Its a good idea to put a paging mechanism to speed up things.
Client Side
On the client, subscribe to the logsAndUsers collection, if you want to make changes do it directly to the Logs collection.
LogsAndUsers = new Meteor.collection('logsAndUser');
Logs = new Meteor.colection('logs'); // Changes here are observed in the LogsAndUsers collection
Meteor.autosubscribe(function () {
var page = Session.get('page') || 1;
Meteor.subscribe('logsAndUsers', page);
});
Why not just also store the username in the logs collection as well?
Then you can query on them directly without needing any kind of "join"
If for some reason you need to be able to handle that username change, you just fetch the user object by name, then query on Logs with { user_id : user._id }

how to calculate count and unique count over two fields in mongo reduce function

I have a link tracking table that has (amongst other fields) track_redirect and track_userid. I would like to output both the total count for a given link, and also the unique count - counting duplicates by the user id. So we can differentiate if someone has clicked the same link 5 times.
I've tried emitting this.track_userid in both the key and values parts but can't get to grips with how to correctly access them in the reduce function.
So if I roll back to when it actually worked, I have the very simple code below - just like it would be in a 'my first mapreduce function' example
map
function() {
if(this.track_redirect) {
emit(this.track_redirect,1);
}
}
reduce
function(k, vals) {
var sum = 0;
for (var i in vals) {
sum += vals[i];
}
return sum;
}
I'd like to know the correct way to emit the additional userid information and access it in the mapreduce please. or am i thinking about it in the wrong way?
in case it's not clear, I don't want to calculate the total clicks a userid has made, but to count the unique clicks of each url + userid - not counting any duplicate clicks a userid made on each link
can someone point me in the right direction please? thanks!
You can actually pass arbitrary object on the second parameter of the emit call. That means you can take advantage of this and store the userid in it. For example, your map function can look like this:
var mapFunc = function() {
if (this.track_redirect) {
var tempDoc = {};
tempDoc[this.track_userid] = 1;
emit(this.track_redirect, {
users_clicked: tempDoc,
total_clicks: 1
});
}
};
And your reduce function might look like this:
var reduceFunc = function(key, values) {
var summary = {
users_clicked: {},
total_clicks: 0
};
values.forEach(function (doc) {
summary.total_clicks += doc.total_clicks;
// Merge the properties of 2 objects together
// (and these are actually the userids)
Object.extend(summary.users_clicked, doc.users_clicked);
});
return summary;
};
The users_clicked property of the summary object basically stores the id of every user as a property (since you can't have duplicate properties, you can guarantee that it will store unique users). Also note that you have to be careful of the fact that some of the values passed to the reduce function can be result of a previous reduce and the sample code above takes that into account. You can find more about the said behavior in the docs here.
In order to get the unique count, you can pass in the finalizer function that gets called when the reduce phase is completed:
var finalFunc = function(key, value) {
// Counts the keys of an object. Taken from:
// http://stackoverflow.com/questions/18912/how-to-find-keys-of-a-hash
var countKeys = function(obj) {
var count = 0;
for(var i in obj) {
if (obj.hasOwnProperty(i))
{
count++;
}
}
return count;
};
return {
redirect: key,
total_clicks: value.total_clicks,
unique_clicks: countKeys(value.users_clicked)
};
};
Finally, you can execute the map reduce job like this (modify the out attribute to fit your needs):
db.users.mapReduce(mapFunc, reduceFunc, { finalize: finalFunc, out: { inline: 1 }});