Search Map / Reduce - mongodb

got a job comparison sql and nosql data for my report for the college course. I'm doing a inner join testing, and mongo did something equivalent to the map reduce but am having problems.
Find all records are correct, fetch rows from a specific id is also correct, but I can not search for text, or other attribute.
My collections:
y_um_milhao{id_y_um_milhao, col_descricao}
x_um_milhao{id_x_um_milhao, col_decimal, fk_y_um_milhao}
My map/reduce:
var mapX_um_milhao = function(){
var output = {id_x: this.x_um_milhao,olDecimal:this.col_decimal, id_x:this.id_x_um_milhao}
emit (this.id_x_um_milhao, output);
};
var mapY_um_milhao = function(){
var output = {y_id: this.y_um_milhao, colDescricao:this.col_descricao, id_y:this.id_y_um_milhao}
emit(this.id_y_um_milhao, output);
};
var reduce_um_milhao = function(key, values){
var outs = {colDescricao:null, id_y:null, colDecimal:null, id_x:null};
values.forEach(function(v){
if(outs.colDescricao == null){
outs.colDescricao = v.colDescricao
}
if(outs.id_y == null){
outs.id_y = v.id_y
}
if(outs.colDecimal == null){
outs.colDecimal = v.colDecimal
}
if(outs.id_x == null){
outs.id_x = v.id_x
}
});
return outs;
};
result = db.x_um_milhao.mapReduce(mapX_um_milhao, reduce_um_milhao, {out: {reduce: 'x_y'}});
result = db.y_um_milhao.mapReduce(mapY_um_milhao, reduce_um_milhao, {out: {reduce: 'x_y'}});
Seeking all records:
db.x_y.find()
Seeking registration by id:
db.x_y.find({_id:1)
Result:
{ "_id" : 1, "value" : { "colDescricao" : "Teste TCC1", "id_y" : 1, "colDecimal" : 13.38, "id_x" : 1 } }
Now I can not search for "colDescricao" how could he do?
PS: Sorry for the English, not speak and used the google translator

You can search for "colDescricao" like this:
db.x_y.find({value.colDescricao : "Teste TCC1"})

Related

Selective specific columns in SailsJS via API

I want to select certain columns be returned back from sailsjs / waterline / API
Given the search parameters:
var searchParams = {
"select":["ClientTypeID"]
, "where": {
"or" :
[{"ClientType": {"contains": "MAINT"}}]
}};
I make API call :
/api/reference/client_type?select=ClientTypeID&where=%7B%22or%22:%5B%7B%22ClientType%22:%7B%22contains%22:%22MAINT%22%7D%7D%5D%7D
based on
Select specific fields from database
I believe I am making the correct query, however, the JSON object that is returned has the all the columns of entity vs the 1 I wish to request ClientTypeID?
This is known issue but I've found workaround. You can override default find blueprint with your own. You need to create api/blueprints/find.js file with content:
var _ = require('lodash');
var Promise = require('bluebird');
var actionUtil = require('sails/lib/hooks/blueprints/actionUtil');
var takeAliases = _.partial(_.pluck, _, 'alias');
var populateAliases = function (model, alias) {
return model.populate(alias);
};
module.exports = function (req, res) {
_.set(req.options, 'criteria.blacklist', ['limit', 'skip', 'sort', 'populate', 'fields']);
var fields = req.param('fields') ? req.param('fields').replace(/ /g, '').split(',') : [];
var populate = req.param('populate') ? req.param('populate').replace(/ /g, '').split(',') : [];
var Model = actionUtil.parseModel(req);
var where = actionUtil.parseCriteria(req);
var limit = actionUtil.parseLimit(req);
var skip = actionUtil.parseSkip(req);
var sort = actionUtil.parseSort(req);
var findQuery = _.reduce(_.intersection(populate, takeAliases(Model.associations)), populateAliases, Model.find().where(where).limit(limit).skip(skip).sort(sort));
var countQuery = Model.count(where);
Promise.all([findQuery, countQuery])
.spread(function (_records, _count) {
var records = fields.length > 0 ? _.map(_records, _.partial(_.pick, _, fields)) : _records;
return [records, null, null, {
criteria: where,
limit: limit,
start: skip,
end: skip + limit,
total: _count
}];
})
.spread(res.ok)
.catch(res.serverError);
};
This is general blueprint I'm using in all of my projects. For all fixed blueprints you can take a look here - https://github.com/ghaiklor/generator-sails-rest-api/tree/dev/generators/app/templates/api/blueprints

In Meteor, simple Collection.find().fetch() is returning an empty array

here is my code. Could anyone please help me out in knowing why the value of rekt on the runtime is an empty array? On my meteor all find().fetch() are returning an empty array only. Does anyone have any experience with this sort of a thing.
PlayersList = new Mongo.Collection('players');
rekt = PlayersList.find().fetch();
console.log(rekt);
ids = new Object();
sorted = new Object();
oldq = new Object;
try
{
rs1 = {name : "Sam"};
PlayersList.update(rs1 , {$set: {name : "Sam" , score : 100}} , {upsert:true});
console.log(resultset);
}catch(e)
{}
function selectscore(cutoff)
{
var oldq = PlayersList.find().fetch();
for(var a in oldq)
{
elm = oldq[a];
if(elm.score<=cutoff)
{
sorted[elm.name] = elm.score;
}
}
return sorted;
}
if (Meteor.isClient) {
oldq = PlayersList.find().fetch();
for(var a in oldq)
{
elm = oldq[a];
ids[elm.name] = elm._id;
}
selectscore(50);
// counter starts at 0
Session.setDefault('counter', 0);
Template.hello.helpers({
counter: function () {
return Session.get('counter');
}
});
Template.hello.events({
'click button': function () {
// increment the counter when button is clicked
Session.set('counter', Session.get('counter') + 1);
}
});
}
if (Meteor.isServer) {
Meteor.startup(function () {
// code to run on server at startup
PlayersList.update({name:"Bob"} , {$set:{name: "Mob", score: 500}});
PlayersList.update({name:"Mob"} , {$set:{name: "Cobb", score: 5000}});
resultset = PlayersList.find().fetch();
//console.log(resultset.attr(_id));
rs = {name : "Slobb"};
PlayersList.update({name:"Cobb"} , {$set:{name: "Slobb", score: 50000}});
PlayersList.update( rs, {$set:{name: "Bob", score: 50}});
newq = PlayersList.find().fetch();
});
}
Thanks in advance.
You are logging the same result twice. The array returned by fetch isn't reactive. Meaning, when you fetch and then insert, the array wont change. So try replacing the second console.log(resultset); with PlayersList.find().fetch();. Also, set up the proper publish/subscribe functions, (as #Paul described).
it appears that there is some issue here not allowing the find().fetch() functions to return the object array. I tried kriegs solution but it did not work as well. I finally managed to solve the problem using Deps.autorun.
Thanks all

MongoDB getting size of cursor takes a long time if size is 0

I'm currently working on a project where I'm using keyword queries against a MongoDB. If I search for things that exists in the database everything works ok, but if I search for things that don't exist, or I have a typo in my query the appilcation basically crashes.
The query is as simple as this:
var query = Query.And(Query.Matches("text", searchText)
Where searchText is what's being written into the searchbox in the UI.
To check the size of the cursor I've tried implementing this:
if ( cursor.Size() == 0)
{
MessageBox.Show("Your search did not return a match. Please search for
something else.");
return database;
}
But the system takes 10-15 minutes to evaluate that the size is 0, compared to the 0.5 seconds if the size is 1 or more.
So do anyone have any suggestions? Either a better way of checking the size of the cursor or some kind of function that makes the method time out and tell the user that no match was found?
Thanks in advance.
Update:
As requested added the explain for something that should and something that shouldn't exist
db.docs.find( {text: "a"}).explain
function (verbose) {
/* verbose=true --> include allPlans, oldPlan fields */
var n = this.clone();
n._ensureSpecial();
n._query.$explain = true;
n._limit = Math.abs(n._limit) * -1;
var e = n.next();
function cleanup(obj){
if (typeof(obj) != 'object'){
return;
}
delete obj.allPlans;
delete obj.oldPlan;
if (typeof(obj.length) == 'number'){
for (var i=0; i < obj.length; i++){
cleanup(obj[i]);
}
}
if (obj.shards){
for (var key in obj.shards){
cleanup(obj.shards[key]);
}
}
if (obj.clauses){
cleanup(obj.clauses);
}
}
if (!verbose)
cleanup(e);
return e;
}
db.docs.find( {text: "fgrgfk"}).explain
function (verbose) {
/* verbose=true --> include allPlans, oldPlan fields */
var n = this.clone();
n._ensureSpecial();
n._query.$explain = true;
n._limit = Math.abs(n._limit) * -1;
var e = n.next();
function cleanup(obj){
if (typeof(obj) != 'object'){
return;
}
delete obj.allPlans;
delete obj.oldPlan;
if (typeof(obj.length) == 'number'){
for (var i=0; i < obj.length; i++){
cleanup(obj[i]);
}
}
if (obj.shards){
for (var key in obj.shards){
cleanup(obj.shards[key]);
}
}
if (obj.clauses){
cleanup(obj.clauses);
}
}
if (!verbose)
cleanup(e);
return e;
}
Update 2: Overview of indexes:
db.docs.getIndexes()
{
"v" : 1,
"key" : {
"_id" : 1
},
"ns" : "tweet_database.docs",
"name" : "_id_"
}

mongoDb global (scope) variable usage on multi-shards installation

I'm designing system that should be able to process millions of documents and report on them in different ways.
mongoDb map\reduce task is what I'm trying to implement (currently doing some investigation on that).
The very basic document structure is
db.test.insert(
{
"_id" : ObjectId("4f6063601caf46303c36eb27"),
"verbId" : NumberLong(1506281),
"sentences" : [
{
"sId" : NumberLong(2446630),
"sentiment" : 2,
"categories" : [
NumberLong(3257),
NumberLong(3221),
NumberLong(3291)
]
},
{
"sId" : NumberLong(2446631),
"sentiment" : 0,
"categories" : [
NumberLong(2785),
NumberLong(2762),
NumberLong(2928),
NumberLong(2952)
]
},
{
"sId" : NumberLong(2446632),
"sentiment" : 0,
"categories" : [
NumberLong(-2393)
]
},
{
"sId" : NumberLong(2446633),
"sentiment" : 0,
"categories" : [
NumberLong(-2393)
]
}
]
})
So that each document contains sentences, that could belong to different categories.
The report I'm trying to get is number of sentences in category (with percent of verbatims).
I'm doing next map-reduce jobs with finalize method to count different averages.
var map = function() {
var docCategories = new Array();
var catValues = new Array();
for (var i = 0; i < this.sentences.length; i++) { //iterate over sentences.
sentence = this.sentences[i];
for (var j = 0; j < sentence.categories.length; j++) {//iterate over categories
catId= sentence.categories[j].toNumber();
if (docCategories.indexOf(catId) < 0) {
docCategories.push(catId);
catValues.push({sentiment : sentence.sentiment, sentenceCnt: 1});
} else {
categoryIdx = docCategories.indexOf(catId);
catValue = catValues[categoryIdx];
catValue.sentiment = catValue.sentiment + sentence.sentiment;
catValue.sentenceCnt = catValue.sentenceCnt + 1;
}
}
}
totalCount++; //here we do try to count distinctCases see scope.
for (var i = 0; i < docCategories.length; i ++) {
emit(docCategories[i], {count: 1, sentenceCnt: catValues[i].sentenceCnt, sentiment: catValues[i].sentiment, totalCnt : totalCount});
}
};
var reduce = function(key, values) {
var res = {count : 0, sentenceCnt : 0, sentiment : 0};
for ( var i = 0; i < values.length; i ++ ) {
res.count += values[i].count;
res.sentenceCnt += values[i].sentenceCnt;
res.sentiment += values[i].sentiment;
}
return res;
};
var finalize = function(category, values) {
values.sentimentAvg = values.sentiment / values.sentenceCnt;
values.percentOfVerbatim = values.count / totalCount //scope variable (global)
return values;
};
var res = db.runCommand( { mapreduce:'test',
map:map,
reduce:reduce,
out: 'cat_volume',
finalize:finalize,
scope:{totalCount : 0},
});
The most interesting part here is that I'm using totalCount - to count number of verbatims I'm emitting. totalCount is the scope (global) variable.
Everything went well on One mongoDb installation, but when going to a shard instances I'm getting "Infinity" for percentOfVerbatim.
Actually in that case totalCount would be just db.test.count() (number of documents) but in future I'm going to add different conditions for documents to be count.
Doing any other query is very undesirable since db is very heavy.
Are there any other approaches to using global (scope) variables on multi-instance mongodb installation? Or should I use something else?
The scope variables are not shared among the shards. You can treat it as a global constant. Updates to the value won't be visible to map or reduce functions running on different shards.
Finally I've found the way how to count number of documents I'm emitting.
The only way that worked for me is emitting documentId, and puting ids into the array on reduce.
On client side (I'm writing java program) I have to count just all distinct Ids.
So, while doing map I do emit
emit(docCategories[i], {verbIds : [this.verbId.toNumber()], count: 1, sentenceCnt: catValues[i].sentenceCnt, sentiment: catValues[i].sentiment, totalCnt : totalCount});
Reduce function is the following:
var reduce = function(key, values) {
var res = {verbIds : [], count : 0, sentenceCnt : 0, sentiment : 0};
for ( var i = 0; i < values.length; i ++ ) {
// res.verbIds = res.verbIds.concat(values[i].verbIds); //works slow
for ( var j = 0; j < values[i].verbIds.length; j ++ ) {
res.verbIds.push(values[i].verbIds[j]);
}
res.count += values[i].count;
res.sentenceCnt += values[i].sentenceCnt;
res.sentiment += values[i].sentiment;
}
return res;
};
Java side program just count distinct Ids over all of the results.
Actually for 1.1M documents execution slows down significantly

Removing documents while preserving at least one

I have a MongoDB collection containing history data with id and timestamp.
I want to delete data from the collection older than a specific
timestamp. But for every id at least one
document (the newest) must stay in the collection.
Suppose I have the following documents in my collection ...
{"id" : "11", "timestamp" : ISODate("2011-09-09T10:27:34.785Z")} //1
{"id" : "11", "timestamp" : ISODate("2011-09-08T10:27:34.785Z")} //2
{"id" : "22", "timestamp" : ISODate("2011-09-05T10:27:34.785Z")} //3
{"id" : "22", "timestamp" : ISODate("2011-09-01T10:27:34.785Z")} //4
... and I want to delete documents having a timestamp older than
2011-09-07 then
1 and 2 should not be deleted because they are newer.
4 should be deleted because it is older, but 3 should not be deleted
(although it is older) because
at least one document per id should stay in the collection.
Does anyone know how I can do this with casbah and/or on the mongo
console?
Regards,
Christian
I can think of a couple of ways. First, try this:
var cutoff = new ISODate("2011-09-07T00:00:00.000Z");
db.testdata.find().forEach(function(data) {
if (data.timestamp.valueOf() < cutoff.valueOf()) {
// A candidate for deletion
if (db.testdata.find({"id": data.id, "timestamp": { $gt: data.timestamp }}).count() > 0) {
db.testdata.remove({"_id" : data._id});
}
}
});
This does the job you want. Or you can use a MapReduce job to do it as well. Load this into a text file:
var map = function() {
emit(this.id, {
ref: this._id,
timestamp: this.timestamp
});
};
var reduce = function(key, values) {
var cutoff = new ISODate("2011-09-07T00:00:00.000Z");
var newest = null;
var ref = null;
var i;
for (i = 0; i < values.length; ++i) {
if (values[i].timestamp.valueOf() < cutoff.valueOf()) {
// falls into the delete range
if (ref == null) {
ref = values[i].ref;
newest = values[i].timestamp;
} else if (values[i].timestamp.valueOf() > newest.valueOf()) {
// This one is newer than the one we are currently saving.
// delete ref
db.testdata.remove({_id : ref});
ref = values[i].ref;
newest = values[i].timestamp;
} else {
// This one is older
// delete values[i].ref
db.testdata.remove({_id : values[i].ref});
}
} else if (ref == null) {
ref = values[i].ref;
newest = values[i].timestamp;
}
}
return { ref: ref, timestamp: newest };
};
Load the above file into the shell: load("file.js");
Then run it: db.testdata.mapReduce(map, reduce, {out: "results"});
Then remove the mapReduce output: db.results.drop();