Load huge JSON file using Pg-Promise helpers and fs stream.
I'm using pg-promise and I want to make massive inserts into a table using pgp.helpers. I've seen solution like Multi-row insert with pg-promise and also followed Data import for streams (Spex) but still it fails with the same error as in this post https://github.com/vitaly-t/spex/issues/8
I tried using a example from the other post on CSV stream(rs.csv()) but when i replaced the same with JSonStream parser I still get the same error.
Can you please share a working example?
db.tx(t => {
return streamRead.call(t, stream.pipe(parser), receiver)
})
There might be a better way to do it, but the below code sure works!
I have the chunks(row.length) at 20,000 per insert statement you can adjust accordingly based on your needs.
stream.pipe(parser)
parser.on('data', data =>{
row.push(data)
if (row.length === 20000) {
parser.pause()
//console.log(row.length)
db.tx('inserting-products', t => {
const insert = pgp.helpers.insert(row, cs)
t.none(insert).then(() => {
row =[]
parser.resume()
})
})
}
})
parser.on('end', () =>{
//console.log(row.length)
if(row.length != 0){
db.tx('inserting-products', t => {
const insert = pgp.helpers.insert(row, cs)
t.none(insert).then(() => {
console.log('success')
db.$pool.end()
})
})
}
if(row.length === 0) {
console.log('success')
db.$pool.end()
}
})
Please let me know in comments if this helps or any other ways to improve the process.
Related
I am trying to update multiple columns within a table using Knex.js
I have tried looking at suggestions in the Knex docs as well as online, but so far have not found any working solution.
My coding attempt:
const userUpdateHandler = (req,res,database)=>{
const { id } = req.params;
const { name, age } = req.body.formInput
database('users')
.where({ id })
.update({ name }).update({ age})
.then(resp => {
if (resp) {
res.json("success")
} else {
res.status(400).json('Not found')
}
})
.catch(err => res.status(400).json('error updating user'))
}
The above is something I've tried out of sheer desperation, but what I would like it to do is to update multiple columns at once.
Could you please advise me on the best approach?
That should work https://runkit.com/embed/6ulv4hy93fcj
knex('table').update({foo: 1}).update({bar: 2}).toSQL()
// generates SQL: update "table" set "foo" = ?, "bar" = ?
and so should:
database('users')
.where({ id })
.update({ name, age })
Check out if your generated queries are what you are expecting...
I use PosgreSQL. Now I have an INSERT raw query. I want to know ask 2 questions:
I want to know if it is success or failure when executing it
If success, I want to get the new data I inserted
var querydb = INSERT INTO chat_message VALUES (DEFAULT, '${text}',
'${created_at}', '${room_id}', '${user_id}');
sequelize.query(querydb, { type: sequelize.QueryTypes.INSERT})
.then(insertID => {
resolve(insertID)
})
.catch(err => {
reject(err)
})
First of all you shouldn't be using raw queries until unless you are not able to do anything with sequelize.js , or else there is no use of using this library.
Use sequelize.js as much as possible , so that you can use default functionality very easily.
So this is how you can do the same thing in sequelize.js way :
ChatMessage.create({
text: text,
room_id: room_id,
user_id: user_id
created_at: new Date() // Not required , it will be auto managed by sequlize
}).then(message => {
console.log(message);
// you can now access the newly ChatMessage task via the variable message
}).catch(err => {
// catch error if anything goes wrong
})
I am newbie in sails.js and MongoDB. Thanks for read my question.
I got really trouble.
Situation
I have User db, Report db, Post db.
I want to get Report data and populated post data which parent is Report data. And I also want to get User data from post data.
In other words, I want to get double nested data.
Report
|_ post
|_user
And this is my findReports action in ReportController.js
findReports: function(req, res) {
Report.find().populateAll().sort({createdAt: -1}).exec((er, reports) => {
if (er) return res.negotiate(er)
const rs = reports
console.log('rs : ', rs)
rs.forEach((report) => {
if(report.post) {
User.findOne({id: report.post.user}).exec((er, user) => {
report.post.user = user
console.log("before rs : ", rs)
})
console.log("report.post.user : ", report.post.user)
console.log("after rs : ", rs)
} else {
report.comment.user = User.findOne({id: report.comment.user}).exec((er, user) => {
report.commet.user = user
})
}
})
console.log("final rs : ", rs)
res.view('dashboard/reports/index', { reports: rs })
})
},
When I run my code, In my console, result is printed according following order.
"after rs : ..(blah).." - I don't want...
"final rs : ..(blah).." - I don't want...
"before rs : ..(blah).." - I want to get this rs! but when my res.view("dashboard....", {reports, rs}) is 2's result.
I think when "User.find" method is defered unrelevant to whole logic.
I really get in trouble. Any advice will be really helpful. Please, give me some advice.
Waterline form SailsJS doesn't support deepPopulation. So instead of the ORM you must use another like: Offshore
Example:
Report.find().populate("post.user").exec((err, data) => {
// ur code here
})
I was trying to get familiar with the WriteResult object in mongo, but I can't access any of its values. The docs say the number of values inserted is stored in WriteResult.nInserted. Trying to access nInserted is crashing my server.
var readings = new Readings({
val1: parseInt(Data[0]),
val2: parseInt(Data[1]),
val3: parseInt(Data[2]),
val4: parseInt(Data[3]),
val5: parseInt(Data[4]),
val6: parseInt(Data[5]),
})
var result = readings.save(function (err, post){
if(err){return next(err)}
res.status(201).json(readings)
})
if(result.nInserted > 0){
console.log('wrote to database')
}
else{
console.log('could not write to database')
}
I know the data is being written to the database. I see it in the mongo shell.
The save method on a model instance doesn't return anything. All results are reported via the callback method, so you'd use something like this:
readings.save(function (err, doc, numberAffected){
if(err){return next(err)}
if (numberAffected > 0) {
console.log('updated an existing doc');
} else {
console.log('added a new doc');
}
res.status(201).json(doc)
})
Mongoose doesn't give you access to the full WriteResult, but as long as err is null you can rest assured the save succeeded and it's only a matter of whether an existing doc was updated or a new one was added. Because you're creating a new doc here, numberAffected will always be 0.
I am in the process of changing the schema for one of my MongoDB collections. (I had been storing dates as strings, and now my application stores them as ISODates; I need to go back and change all of the old records to use ISODates as well.) I think I know how to do this using an update, but since this operation will affect tens of thousands of records I'm hesitant to issue an operation that I'm not 100% sure will work. Is there any way to do a "dry run" of an update that will show me, for a small number of records, the original record and how it would be changed?
Edit: I ended up using the approach of adding a new field to each record, and then (after verifying that the data was right) renaming that field to match the original. It looked like this:
db.events.find({timestamp: {$type: 2}})
.forEach( function (e) {
e.newTimestamp = new ISODate(e.timestamp);
db.events.save(e);
} )
db.events.update({},
{$rename: {'newTimestamp': 'timestamp'}},
{multi: true})
By the way, that method for converting the string times to ISODates was what ended up working. (I got the idea from this SO answer.)
My advice would be to add the ISODate as a new field. Once confirmed that all looks good you could then unset the the string date.
Create a test environment with your database structure. Copy a handful of records to it. Problem solved. Not the solution you were looking for, I'm sure. But, I believe, this is the exact circumstances that a 'test environment' should be used for.
Select ID of particular records that you would like to monitor. place in the update {_id:{$in:[<your monitored id>]}}
Another option which depends of the amount of overhead it will cause you -
You can consider writing a script, that performs the find operation, add printouts or run in debug while the save operation is commented out. Once you've gained confidence you can apply the save operation.
var changesLog = [];
var errorsLog = [];
events.find({timestamp: {$type: 2}}, function (err, events) {
if (err) {
debugger;
throw err;
} else {
for (var i = 0; i < events.length; i++) {
console.log('events' + i +"/"+(candidates.length-1));
var currentEvent = events[i];
var shouldUpdateCandidateData = false;
currentEvent.timestamp = new ISODate(currentEvent.timestamp);
var change = currentEvent._id;
changesLog.push(change);
// // ** Dry Run **
// currentEvent.save(function (err) {
// if (err) {
// debugger;
// errorsLog.push(currentEvent._id + ", " + currentEvent.timeStamp + ', ' + err);
// throw err;
// }
// });
}
console.log('Done');
console.log('Changes:');
console.log(changesLog);
console.log('Errors:');
console.log(errorsLog);
return;
}
});
db.collection.find({"_manager": { $exists: true, $ne: null }}).forEach(
function(doc){
doc['_managers']=[doc._manager]; // String --> List
delete doc['_manager']; // Remove "_managers" key-value pair
printjson(doc); // Debug by output the doc result
//db.teams.save(doc); // Save all the changes into doc data
}
)
In my case the collection contain _manager and I would like to change it to _managers list. I have tested it in my local working as expected.
In the several latest versions of MongoDB (at least starting with 4.2), you could do that using a transaction.
const { MongoClient } = require('mongodb')
async function main({ dryRun }) {
const client = new MongoClient('mongodb://127.0.0.1:27017', {
maxPoolSize: 1
})
const pool = await client.connect()
const db = pool.db('someDB')
const session = pool.startSession()
session.startTransaction()
try {
const filter = { id: 'some-id' }
const update = { $rename: { 'newTimestamp': 'timestamp' } }
// This is the important bit
const options = { session: session }
await db.collection('someCollection').updateMany(
filter,
update,
options // using session
)
const afterUpdate = db.collection('someCollection')
.find(
filter,
options // using session
)
.toArray()
console.debug('updated documents', afterUpdate)
if (dryRun) {
// This will roll back any changes made within the session
await session.abortTransaction()
} else {
await session.commitTransaction()
}
} finally {
await session.endSession()
await pool.close()
}
}
const _ = main({ dryRun: true })