Map Reduce Mongodb Node JS native driver - mongodb

I'm working with the Mongodb native driver using a Map reduce function. Basically I have a mediaId as a key and want to count how many medias loaded and started per mediaId.
So what I've done was:
var map = function(){
emit(this.media.id, {
count: 1,
played: 0,
ph: this.project.id,
title: this.media.title,
media: this.media.id,
origin: this.origin,
thumbnail: this.media.thumbnail,
mediaDuration: this.media.mediaDuration,
state: this.state
});
};
var reduce = function(k, vals) {
result = {
count: 0,
played: 0,
ph: '',
title: '',
media: '',
origin: '',
thumbnail: '',
mediaDuration: 0,
state: ''
};
vals.forEach(function(doc){
result.count += doc.count;
result.ph = doc.ph;
result.title = doc.title;
result.media = doc.media;
result.thumbnail = doc.thumbnail;
result.mediaDuration = doc.mediaDuration;
result.state = doc.state;
result.origin = doc.origin;
if(doc.state === "started") {
result.played += 1;
}
});
return result;
};
In my test collection I have 2 different mediaIds. One with 553 objects and another one with just 1 object. I've putted all in the "started" state to test this so basically the number of count should be equal to the number of played.
When I run the Map/Reduce function it returns to me ( I used the "toArray" function of the mongodb native driver):
[ { _id: '12398asdsa9802193810asd120',
value:
{ count: 1,
played: 0,
ph: '123213ased12231',
title: 'xxxxxxxxxxxxxxxxxxxxxxxxxxx',
media: '1xxxxxxxxxxxxxxxxxxxxxxxxxxx1',
origin: 'http://www.google.com',
thumbnail: 'http://cache.ohinternet.com/images/0/0e/Forever_Alone.png',
mediaDuration: 12321321,
state: 'started' } },
{ _id: '2c9f94b42f5b5114012f5b92ea430066',
value:
{ count: 553,
played: 155,
ph: '316',
title: 'xxxxxxxxxxxxxxxxxxxxxxxxxxx',
media: '2xxxxxxxxxxxxxxxxxxxxxxxxxxx2',
origin: 'http://localhost:9000/views/index.html',
thumbnail: null,
mediaDuration: null,
state: 'started' } } ]
It seems that one I have just one object the reduce function isn't called ( I did some tests with another collection with more than 100 mediaIds and the behavior was identical. Does anyone have an idea of what is wrong with that?
Thanks A LOT for your time,
Cheers.

I sort of solved the "issue".
I did the filter on the Map Function and not on the Reduce function. Something like this:
var map = function(){
if(this.media.state==="started") {
var played = 1;
}else{var played = 0;}
emit(this.media.id, {
count: 1,
played: played,
ph: this.project.id,
title: this.media.title,
media: this.media.id,
origin: this.origin,
thumbnail: this.media.thumbnail,
mediaDuration: this.media.mediaDuration,
state: this.state
});
};
Hope it helps anyone that is having the same "problem"

Related

MongoDB query with 300k documents takes more than 30 seconds

Ok, as said in title, I have "performance issue" where I need to get all documents from a collection but it takes too long. Players collection contains around 300k documents with small size and query in service goes like this:
async getAllPlayers() {
const players = await this.playersCollection.find({}, {projection: { playerId: 1, name: 1, surname: 1, shirtNumber: 1, position: 1 }}).toArray();
return players;
}
Overall size is 6.4MB. I'm using Fastify adapter, fastify-compress and mongodb native driver. If I remove projection, it takes almost a minute.
Any idea how to improve this?
The best time I get is 8 seconds, where fast-json-stringify give me more than 10 seconds boost over 300k records:
'use strict'
// run fresh mongo
// docker run --name temp --rm -p 27017:27017 mongo
const fastify = require('fastify')({ logger: true })
const fjs = require('fast-json-stringify')
const toString = fjs({
type: 'object',
properties: {
playerId: { type: 'integer' },
name: { type: 'string' },
surname: { type: 'string' },
shirtNumber: { type: 'integer' },
}
})
fastify.register(require('fastify-mongodb'), {
forceClose: true,
url: 'mongodb://localhost/mydb'
})
fastify.get('/', (request, reply) => {
const dataStream = fastify.mongo.db.collection('foo')
.find({}, {
limit: 300000,
projection: { playerId: 1, name: 1, surname: 1, shirtNumber: 1, position: 1 }
})
.stream({
transform(doc) {
return toString(doc) + '\n'
}
})
reply.type('application/jsonl')
reply.send(dataStream)
})
fastify.get('/insert', async (request, reply) => {
const collection = fastify.mongo.db.collection('foo')
const batch = collection.initializeOrderedBulkOp();
for (let i = 0; i < 300000; i++) {
const player = {
playerId: i,
name: `Name ${i}`,
surname: `surname ${i}`,
shirtNumber: i
}
batch.insert(player);
}
const { result } = await batch.execute()
return result
})
fastify.listen(8080)
In any case, you should consider to:
paginate your output
or pushing the data into a bucket (like S3) and return to the client a URL to download the file directly, this will speed up a lot the process and will save your node.js process from this data streaming
Note that the compression in node.js is a heavy process, so it slows it down a lot the response. An nginx proxy adds it by default without the need to implement it in your business logic server.

Variables exporting error in MongoDB error

Question
I have provided my code below for reference. I'm using MongoDB and discord.js v12. So basically, I have made a !info command which shows some general info of the user.
What this code does is, it checks through the member's roles, and regarding which role they have, it calculates their total claim time (for giveaways etc.). The problem here, is with the donator role. I can't figure out why I can't use the donates variable outside the db.findOne block. Here, data.content.length shows the total donates of the users, which means donates * 5 is +5 claim time for each donate.
My Code
const moment = require('moment');
module.exports = {
name: 'info',
async execute(client, message, args, Discord){
const member = message.mentions.members.first() || message.guild.members.cache.get(args[0]) || message.member;
const db = require('../models/d-schema');
db.findOne({ guildid: message.guild.id, user: member.user.id }, async(err, data)=>{
if(err) throw err;
if(data){
const donates = parseInt(data.content.length);
}
})
var DefaultTime = 10;
var support = 0;
var donate = 0;
var boost = 0;
const userRoles = member.roles.cache.map((r) => r.name);
if (userRoles.includes("୨・supporter")) {
support = 3;
}
if (userRoles.includes("୨・donator")) {
donate = donates * 5;
}
if (userRoles.includes("୨・booster")) {
boost = 10;
}
const TotalTime = DefaultTime + support + donate + boost;
const embed = new Discord.MessageEmbed()
.setThumbnail(member.user.displayAvatarURL( {dynamic: true} ))
.addFields(
{name: member.user.tag, value: member.user, inline: true},
{name: 'Nickname', value: `${member.nickname !== null ? member.nickname : 'None'}`, inline: true},
{name: 'Is Bot', value: member.user.bot, inline: true},
{name: 'Joined', value: `${moment.utc(member.joinedAt).format("MMMM Do YYYY")}`, inline: true},
{name: 'Created', value: `${moment.utc(member.user.createdAt).format("MMMM Do YYYY")}`, inline: true},
{name: 'Claim Time', value: `${TotalTime} seconds`, inline: true},
)
.setFooter(`ID : ${member.user.id}`)
.setTimestamp()
.setColor('00ffcc')
message.channel.send(embed)
}
}
You cannot use the donates variable because you are declaring it inside the db.findOne() block. This is called variables scope. For better understanding you can read this answer.
If you want to use it outside of that block, you have to declare it beforehand, like this:
let donates;
db.findOne({ guildid: message.guild.id, user: member.user.id }, async(err, data)=>{
if(err) throw err;
if(data){
donates = parseInt(data.content.length);
}
})
Now you're able to use that variable outside of the db.findOne() block :)
Edit:
Alternative way:
It would be easier to use the function asynchronously. That way, everything can be scoped in the same block!
Example:
These two methods will give the same results:
const data = await Model.findOne({ ... });
console.log(data);
Model.findOne({ ... }, (err, data) => {
console.log(data);
});
Suggestion from Lioness100

Why is my page not rendering EnhancedGrid

Good day to all, while studying dojo, I ran into a problem that I do not draw an EnhancedGrid on my page. this error appears in the browser console:
dojo.js.uncompressed.js:1321 Uncaught TypeError: Cannot read property 'get' of null
at Object.getFeatures (ObjectStore.js.uncompressed.js:241)
at Object._setStore (DataGrid.js.uncompressed.js:14511)
at Object.advice (dojo.js.uncompressed.js:8428)
at Object.c [as _setStore] (dojo.js.uncompressed.js:8408)
at Object.postCreate (DataGrid.js.uncompressed.js:14351)
at Object.l (dojo.js.uncompressed.js:10753)
at Object.postCreate (EnhancedGrid.js.uncompressed.js:90)
at Object.create (DataGrid.js.uncompressed.js:4330)
at Object.postscript (DataGrid.js.uncompressed.js:4243)
at new <anonymous> (dojo.js.uncompressed.js:10950)
the grid drawing script looks like this:
var blogStore;
/**
* Creates Dojo Store.
*/
require(["dojo/store/JsonRest",
"dojo/data/ObjectStore"
], function (JsonRest, ObjectStore) {
blogJsonStore = new JsonRest({
handleAs: 'json',
target: 'http://localhost:8080/myservice'
});
var data = {
identifier: 'id',
items: []
};
blogJsonStore.query({
start: 0,
count: 10
}).then(function (results) {
var res =[];
res = results;
if (0 === res.length){
data.items.push("There are no entries in this blog. Create a post!!!")
}else {
data.items.push(results)
}
});
blogStore = new ObjectStore({data: data});
});
/**
* Creates Dojo EnhancedGrid.
*/
require(["dojox/grid/EnhancedGrid",
"dojox/grid/enhanced/plugins/Filter",
"dojox/grid/enhanced/plugins/NestedSorting",
"dojox/grid/enhanced/plugins/Pagination",
"dojo/domReady!"
], function (EnhancedGrid) {
Grid = new EnhancedGrid({
id: 'grid',
store: blogStore,
structure: [
{ name: 'Message', field: 'text', datatype: 'string',
width: 'auto', autoComplete: true }
],
rowsPerPage: 5,
rowSelector: "20px",
selectionMode: "single",
plugins: {
nestedSorting: true,
pagination: {
description: true,
pageStepper: true,
sizeSwitch: true,
pageSizes: ["5","10","15","All"],
maxPageStep: 4,
position: "bottom"
}
}
});
Grid.placeAt('resultDiv');
Grid.startup();
});
if you remove the blog "Creates Dojo Store." it renders normally
Help me solve the problem. Thank you in advance for any help

Meteor Mongo Collections find forEach cursor iteration and saving to ElasticSearch Problem

i have Meteor App which is connected to MongoDB.
In mongo i have a table which has ~700k records.
I have a cron job each week, where i read all the records from the table (using Mongo Cursor) and in batches of 10k i want to insert them inside Elastic Search so they are indexed.
let articles = []
Collections.Articles.find({}).forEach(function(doc) {
articles.push({
index: {_index: 'main', _type: 'article', _id: doc.id }
},
doc);
if (0 === articles.length % 10000) {
client.bulk({ maxRetries: 5, index: 'main', type: 'article', body: articles })
data = []
}
})
Since for each is synchronous, goes over each record before it continues, and client.bulk is async, this is overloading the elastic search server and it crashes with Out of Memory Exception.
Is there a way to pause the forEach during the time when the insert is being done? I tried async/await but this does not seem to work as well.
let articles = []
Collections.Articles.find({}).forEach(async function(doc) {
articles.push({
index: {_index: 'main', _type: 'article', _id: doc.id }
},
doc);
if (0 === articles.length % 10000) {
await client.bulk({ maxRetries: 5, index: 'main', type: 'article', body: articles })
data = []
}
})
Any way how to achieve this?
EDIT: I am trying to achieve something like this - if i use promises
let articles = []
Collections.Articles.find({}).forEach(function(doc) {
articles.push({
index: {_index: 'main', _type: 'article', _id: doc.id }
},
doc);
if (0 === articles.length % 10000) {
// Pause FETCHING rows with forEach
client.bulk({ maxRetries: 5, index: 'main', type: 'article', body: articles }).then(() => {
console.log('inserted')
// RESUME FETCHING rows with forEach
console.log("RESUME READING");
})
data = []
}
})
Managed to get this working with ES2018 Async iteration
Got an idea from
Using async/await with a forEach loop
Here is the code that is working
let articles = []
let cursor = Collections.Articles.find({})
for await (doc of cursor) {
articles.push({
index: {_index: 'main', _type: 'article', _id: doc.id }
},
doc);
if (articles.length === 10000) {
await client.bulk({ maxRetries: 5, index: 'trusted', type: 'artikel', body: articles })
articles = []
}
}
This works correctly and it manages to insert all the records into Elastic Search without crashing.
If you are concerned with the unthrottled iteration, then may use the internal Meteor._sleepForMs method, that allows you to put a async timeout in your sync-styled code:
Collections.Articles.find().forEach((doc, index) => {
console.log(index, doc._id)
Meteor._sleepForMs(timeout)
})
Now this works fine within the Meteor environment (Meteor.startup, Meteor.methods, Meteor.publish).
You cron is likely to be not within this environment (= Fiber) so you may write a wrapper that binds the environment:
const bound = fct => Meteor.bindEnvironment(fct)
const iterateSlow = bound(function (timeout) {
Collections.Articles.find().forEach((doc, index) => {
console.log(index, doc._id)
Meteor._sleepForMs(timeout)
})
return true
})
iterateSlow(50) // iterates with 50ms timeout
Here is a complete minimal example, that you can reproduce with a fresh project:
// create a minimal collection
const MyDocs = new Mongo.Collection('myDocs')
// fill the collection
Meteor.startup(() => {
for (let i = 0; i < 100; i++) {
MyDocs.insert({})
}
})
// bind helper
const bound = fct => Meteor.bindEnvironment(fct)
// iterate docs with interval between
const iterateSlow = bound(function (timeout) {
MyDocs.find().forEach((doc, index) => {
console.log(index, doc._id)
Meteor._sleepForMs(timeout)
})
return true
})
// simulate external environment, like when cron runs
setTimeout(() => {
iterateSlow(50)
}, 2000)

OrientJS: How to get standard JSON (serialized) from query

I don't understand how to get standard JSON back from an orientjs query. I see people talking about "serializing" the result, but I don't understand why or how to do that. There is a toJSON() method, but i only see it being used with fetchplans etc...
I am trying to pipe a stream to a csv file and it isn't working properly because of the incorrect JSON format.
I would love an explanation of how and when to serialize. :-)
My Query:
return db.query(
`SELECT
id,
name,
out('posted_to').name as page,
out('posted_to').id as page_id,
out('posted_to').out('is_language').name as language,
out('posted_to').out('is_network').name as network
FROM post
WHERE posted_at
BETWEEN
'${since}'
AND
'${until}'
UNWIND
page,
page_id,
language,
network
`
My Result:
[ { '#type': 'd',
id: '207109605968597_1053732754639607',
name: '10 maneiras pelas quais você está ferindo seus relacionamentos',
page: 'Eu Amo o Meu Irmão',
page_id: '207109605968597',
language: 'portuguese',
network: 'facebook',
'#rid': { [String: '#-2:1'] cluster: -2, position: 1 },
'#version': 0 },
{ '#type': 'd',
id: '268487636604575_822548567865143',
name: '10 maneiras pelas quais você está ferindo seus relacionamentos',
page: 'Amo meus Filhos',
page_id: '268487636604575',
language: 'portuguese',
network: 'facebook',
'#rid': { [String: '#-2:3'] cluster: -2, position: 3 },
'#version': 0 }]
This is my dataset:
Query:
db.select('id','code').from('tablename').where({deleted:true}).all()
.then(function (vertex) {
console.log('Vertexes found: ');
console.log(vertex);
});
Output:
Vertexes found:
[ { '#type': 'd',
id: '6256650b-f5f2-4b55-ab79-489e8069b474',
code: '4b7d99fa-16ed-4fdb-9baf-b33771c37cf4',
'#rid': { [String: '#-2:0'] cluster: -2, position: 0 },
'#version': 0 },
{ '#type': 'd',
id: '2751c2a0-6b95-44c8-966a-4af7e240752b',
code: '50356d95-7fe7-41b6-b7d9-53abb8ad3e6d',
'#rid': { [String: '#-2:1'] cluster: -2, position: 1 },
'#version': 0 } ]
If I add the instruction JSON.stringify():
Query:
db.select('id','code').from('tablename').where({deleted:true}).all()
.then(function (vertex) {
console.log('Vertexes found: ');
console.log(JSON.stringify(vertex));
});
Output:
Vertexes found:
[{"#type":"d","id":"6256650b-f5f2-4b55-ab79-489e8069b474","code":"4b7d99fa-16ed-
4fdb-9baf-b33771c37cf4","#rid":"#-2:0","#version":0},{"#type":"d","id":"2751c2a0
-6b95-44c8-966a-4af7e240752b","code":"50356d95-7fe7-41b6-b7d9-53abb8ad3e6d","#ri
d":"#-2:1","#version":0}]
Hope it helps
I found a way that worked for me. instead of using :
db.query()
i used http request in node to query on database. on OrientDB Document also said you get only JSON format in result. this way if you query in database you will always get a valid JSON.
for making a http request i used request module.
this is a sample that worked for me :
var request = require("request");
var auth = "Basic " + new Buffer("root" + ":" + "root").toString("base64")
request(
{
url : encodeURI('http://localhost:2480/query/tech_graph/sql/'+queryInput+'/20'),
headers : {
"Authorization" : auth
}
},
function (error, response, body) {
console.log(body);
return body;
}
);