SQL to MapReduce: where clause with a select statement

SQL to MapReduce: where clause with a select statement - mongodb

How could i translate an sql query to mongodb map reduce when the where clause constains a select statement?
For example
select
sum(l_extendedprice) / 7.0 as avg_yearly
from
lineitem,
part
where
p_partkey = l_partkey
and p_brand = 'Brand#23'
and p_container = 'MED BOX'
and l_quantity < (
select
0.2 * avg(l_quantity)
from
lineitem
where
l_partkey = p_partkey
);
I've tried this mapreduce, but seemingly it did not work.
db.runCommand({
mapreduce: "lineitem",
query: {
"partsupp.ps_partkey.p_brand": "Brand#23",
"partsupp.ps_partkey.p_container": "MED BOX"
},
map: function() {
var data = {l_extendedprice: 0, l_quantity:0, total_l_quantity: 0 };
data.l_extendedprice = this.l_extendedprice;
data.l_quantity = this.l_quantity;
data.total_l_quantity = 1;
emit("avg_yearly", data);
},
reduce: function(key, values) {
var data = {l_extendedprice: 0, l_quantity:0, total_l_quantity: 0 };
var sum_l_quantity = 0;
/*sum the l_quantity and total_l_quantity*/
for (var i = 0; i < values.length; i++) {
sum_l_quantity += values[i].l_quantity;
data.total_l_quantity += values[i].total_l_quantity;
}
/*calculate the average l_quantity and multiply*/
var avg_l_quantity = 0.2 * (sum_l_quantity / data.total_l_quantity);
/*sum l_extendedprice and divide */
for (var i = 0; i < values.length; i++) {
if( values[i].l_quantity < avg_l_quantity ) {
data.l_extendedprice += values[i].l_extendedprice;
}
}
data.l_extendedprice = data.l_extendedprice / 7.0;
return data;
},
out: 'query017'
});
Has another way to do it? Is it possible to do it in the query clause of mapreduce? Or i only had mistaked in my code?
The schema
{
"_id" : ObjectId("511b7d1b3daee1b1446ecdfe"),
"l_quantity" : 17,
"l_extendedprice" : 21168.23,
"l_discount" : 0.04,
"l_shipdate" : ISODate("1996-03-13T03:00:00Z"),
"l_commitdate" : ISODate("1996-02-12T03:00:00Z"),
"l_receiptdate" : ISODate("1996-03-22T03:00:00Z"),
"l_shipmode" : "TRUCK",
"l_comment" : "blithely regular ideas caj",
"partsupp" : {
"ps_availqty" : 6157,
"ps_supplycost" : 719.17,
"ps_partkey" : {
"p_partkey" : NumberLong(155190),
"p_name" : "slate lavender tan lime lawn",
"p_mfgr" : "Manufacturer#4",
"p_brand" : "Brand#44",
"p_type" : "PROMO BRUSHED NICKEL",
"p_size" : 9,
"p_container" : "JUMBO JAR",
"p_retailprice" : 1245.19,
"p_comment" : "regular, final dol"
}
}
}

Related

how call synchronous mongo query in node js

I have to mongodb collection.
first i will call to coll1 and get ids.Then I want to call coll2 and search by ids and some other fields.
when I am calling to funtion it is returing undefined.
how I can wait untill i m not get result from funtion.
coll1 docs sample:
{
"_id" : ObjectId(""),
"container_id" : "56867887fdb391ff09d15e9",
"item_state" : [
{
"user_id" : 1,
"username" : "x",
"state" : "1"
},
{
"user_id" : 2,
"username" : "y",
"state" : "3"
}
],
"name" : "members test"
}
enter code here
function listMyWorkitems(user_id,callback) {
var user_id=1;
var workItemList = new Array();
db.collection('containers').find({'start_date':{"$lt":new Date(2017,02,11)}}).toArray(function(err,docs){
console.log("doc length");
console.log(docs.length);
for (var i = 0; i < docs.length; i++) {
db.collection('work_items').find({"$and":[{'container_id':docs[i]._id.toString()},{'item_state':{"$elemMatch":{'user_id':user_id,'is_active':1,'state':{"$in":["1","2","3"]}}}}]}).toArray(function(err,workDocs){
//console.log(workDocs);
for (var i = 0; i < workDocs.length; i++) {
for (var j = 0; j < workDocs[i].item_state.length; j++) {
var doc=workDocs[i].item_state[j]
workItemList.push(workDocs[i].name)
}
}
});
}
callback(workItemList);
});
}
listMyWorkitems(user_id,funtion(err,workItemList) {
console.log(workItemList)
});
I understand async auto concept but still it is returning empty list
here is code what i tried till now.. i done for collection 1 but once it is solve then i can query to collection 2 also..
var async = require('async');
var mongojs = require("mongojs");
var db = mongojs("localhost/mc_dev");
async.auto({
coll1task: function(callback) {
var idlist =[];
console.log("ids fetch from collection 1");
db.collection('containers').find({'start_date':{"$lt":new Date(2017,02,11)}}).toArray(function(err,docs){
docs.forEach(function(doc){
console.log(doc._id);
idlist.push(doc._id);
});});
callback(null,idlist);
},
finalcontrol: [
'coll1task',
function(results,callback) {
console.log(results.coll1task);
}
],
},
function(error, results) {
console.log('error = ', error)
console.log('results = ', results)
})

The best approach to do things asynchronously in node.js is by using modules like ASYNC or PROMISE.
You can visit async and get access to all the modules provided by this library.
Main modules provided by async library are
async.series=> this is what you can use in your case.
2.async.parallel
async.auto => I will suggest you to use this as it provide you to perform operations asynchronously as well as synchronously
Further more you can also use PROMISES as now they are the part of ECMA SCRIPT 6 there are various modules also which you can use to get this done.

How do query among many collections then write result to corresponding collection

If I want to query users' age > 18,
and export result to corresponding collection,
How could I do it by rewriteing the following script?
The following is psuedo code
source_collections = ["user_1", "user_2", ..., "user_999"]
output_collections = ["result_1", "result_2", ..., "result_999"]
pipeline = [
{
"$match":{"age": > 18}
}
{ "$out" : output_collections }
]
cur = db[source_collections].runCommand('aggregate',
{pipeline: pipeline,allowDiskUse: true})

The script you're looking for is something like:
var prefix_source = 'user_';
var prefix_output = 'result_';
var source_collections = [];
var output_collections = [];
var numCollections = 999;
for (var i = 1; i <= numCollections; i++) {
source_collections.push(prefix_source + i);
output_collections.push(prefix_output + i);
}
var pipeline = [{'$match': {age: {'$gt': 18}}}, {'$out': ''}]; for (var currentCollection = 0; currentCollection < source_collections.length; currentCollection++) {
pipeline[pipeline.length - 1]['$out'] = output_collections[currentCollection];
var cur = db[source_collections[currentCollection]].runCommand('aggregate', {pipeline: pipeline,allowDiskUse: true});
}
And while you're at it, the var cur = ... line could be simplified to
db[source_collections[currentCollection]].aggregate(pipeline, {allowDiskUse: true});
Note: I've added a piece that generates your arrays for you, as I'm sure you're not looking to write them by hand :D

How to add or inc to an array in a update query?

If I have a doc which has an array which contains a items which represents counts for a day, perhaps like :-
{
data : [ {'20141102' : 2 },{'20141103' : 4 } ]
}
when I do an update, and I have a string '20141103' and then later a '20141104' I want to either inc the array entry or add a new array entry. Is this possible with an update?

Yes, it's feasible. I tried like this:
(run on mongo shell; both client and server are V2.6.4)
function tryAndFine(coll, key, value) {
var entry = {};
entry[key] = value;
var parent = 'data';
var prefix = parent + '.';
function incrementOnly() {
var criteria = {};
criteria[prefix + key] = {$exists : true};
var update = {};
update[prefix + "$." + key] = value;
var result = coll.update(criteria, {$inc : update});
// if increment fails, try to add a new one
if (result.nModified == 0) {
addNewElement();
}
}
function addNewElement() {
var criteria = {};
criteria[prefix + key] = {$exists : false};
var update = {};
update[parent] = entry;
var result = coll.update(criteria, {$push : update}, {upsert : true});
// if exists, try to increment the count
if (result.upserted == 0 && result.nModified == 0) {
incrementOnly();
}
}
// run entry
incrementOnly();
}
// test
var c = db.c;
c.drop();
tryAndFine(c, '20141103', 1);
tryAndFine(c, '20141103', 1);
tryAndFine(c, '20141104', 1);
tryAndFine(c, '20141105', 1);
tryAndFine(c, '20141104', 1);
// output
{
"_id" : ObjectId("54577e1a3502852bd4ad2395"),
"data" : [ {
"20141103" : 2
}, {
"20141104" : 2
}, {
"20141105" : 1
} ]
}

MongoDB map reduce count giving more results than a query

I have a collection users in Mongo and I execute this map reduce which I believe is the equivalent of a COUNT(*) GROUP BY origin:
> m = function() { for (i in this.membership) {
... emit( this.membership[i].platform_profile.origin, 1 );
... } }
function () {
for (i in this.membership) {
emit(this.membership[i].platform_profile.origin, 1);
}
}
> r = function( id, values ) { var result = 0;
... for ( var i = 0; i < values.length; i ++ ) { result += values[i]; }
... return result; }
function (id, values) {
var result = 0;
for (var i = 0; i < values.length; i++) {
result += values[i];
}
return result;
}
> db.users.mapReduce(m, r, {out : { inline: 1}});
{
"results" : [
{
"_id" : 0,
"value" : 15
},
{
"_id" : 1,
"value" : 449
},
...
}
But if I try to count how many documents have this field set to a specific value like 1, I get fewer results:
db.users.count({"membership.platform_profile.origin": 1});
424
What am I missing?

Are your count queries using a sparse index by any chance? My only guess there would be if some other query criteria resulted in documents absent from from index to be ignored from the count.
I recreated your schema with some fixture data and the results between map/reduce and simple count queries are in agreement:
db.users.drop();
var map = function() {
for (i in this.membership) {
emit(this.membership[i].platform_profile.origin, 1);
}
};
var reduce = function(id, values ) {
var result = 0;
for (var i = 0; i < values.length; i++) {
result += values[i];
}
return result;
}
var origins = {1: "a", 2: "b", 3: "c", 4: "d"};
for (var i = 0; i < 1000; ++i) {
var membership = [];
for (var o in origins) {
if (0 == i % o) {
membership.push({ platform_profile: { origin: origins[o] }});
}
}
db.users.save({ membership: membership });
}
db.users.mapReduce(map, reduce, {out: {inline: 1}}).results.forEach(function(result){
print(result["_id"] + ": " + result["value"]);
});
for (var o in origins) {
print(origins[o] + ": " + db.users.count({"membership.platform_profile.origin": origins[o]}));
}
Here's the output:
$ mongo --quiet mr_count.js
a: 1000
b: 500
c: 334
d: 250
a: 1000
b: 500
c: 334
d: 250

You can use the following map/reduce for the equivalent of COUNT(*) GROUP BY origin
Map/Reduce Functions :
map = function() {
if(!this.membership) return;
for (i in this.membership) {
if(!this.membership[i].platform_profile || !this.membership[i].platform_profile.origin) return;
emit(this.membership[i].platform_profile.origin, 1);
}
}
reduce = function(key, values) {
var count = 0;
for (v in values) {
count += values[v];
}
return count;
}
result = db.runCommand({
"mapreduce" : "users",
"map" : map,
"reduce" : reduce,
"out" : "users_count"
});

I had the same issue. I replaced x.length by Array.sum(x) in the reduce function (assuming you emit 1 in the map function) and it works. I agree x.length should work too, but I cannot explain why it does not.

mongodb map emit keys from subdocument

I have a document which includes a subdocument:
{
"_id" : ObjectId("XXXXX"),
"SearchKey" : "1234",
"SearchTerms" : {
"STA" : ["1"],
"STB" : ["asdfasdf"],
"STC" : ["another"]
}
}
The SearchTerm elements are not fixed - sometimes we'll have STA without STC, for example.
I can do this:
var map = function() {
for (key in this.SearchTerms)
{
emit(key, 1);
}
}
but I can't do this:
var map = function() {
for (var i=0; i< this.SearchTerms.length; i++)
{
emit(this.SearchTerms[i], 1)
}
}
because the latter doesn't produce any results after the reduce. Why not?
As an aside - what I need to do is count the cross-product of the search terms over all documents, that is, find the incidence of (STA and STB) and (STA and STC) and (STB and STC) in the case above. If someone knows how to do that right away, that works even better.
As always, thanks for the help

The key that you emit should be a composite of both keys.
var map = function() {
if(!this.SearchTerms) return;
for(var i = 0 ; i < this.SearchTerms.length; i++){
var outerKey = this.SearchTerms[i];
for(var j = i + 1; j < this.SearchTerms.length; j++){
var innerKey = this.SearchTerms[j];
// assuming you don't care about counting (STA and STC) separately from (STC and STA),
// then order doesn't matter, lets make sure both occurences have the same key.
var compositeKey = (outerKey < innerKey) ? outerKey+"_"+innerKey : innerKey+"_"+outerKey;
emit(compositeKey, 1);
}
}
}

This is because this.SearchTerms is a dictionary/subdocument and not an array. this.SearchTerms[0] doesn't exist.
For the second question: something like this should work:
for (key1 in this.SearchTerms)
{
for (key2 in this.SearchTerms)
{
if (key1 < key2)
{
key = [key1, key2];
emit(key, 1);
}
}
}

We Keep Coding

iphone swift flutter scala powershell matlab mongodb postgresql perl eclipse

SQL to MapReduce: where clause with a select statement - mongodb

Related

how call synchronous mongo query in node js

How do query among many collections then write result to corresponding collection

How to add or inc to an array in a update query?

MongoDB map reduce count giving more results than a query

mongodb map emit keys from subdocument

Categories

Resources