nextjs parse XLSX on API route from incoming request - axios

I have been trying to reduce my NextJS bundle size by moving my XLSX parsing to an API route. It uses the npm xlsx (sheetjs) package, and extracts JSON from a selected XLSX.
What I am doing in the frontend is
let res;
let formData = new FormData();
formData.append("file", e.target.files[0]);
try {
res = await axios.post("/api/importExcel", formData);
} catch (e) {
createCriticalError(
"Critical error during file reading from uploaded file!"
);
}
On the API route I am unable to to read the file using XLSX.read()
I believe NextJS uses body-parser on the incoming requests but I am unable to convert the incoming data to an array buffer or any readable format for XLSX.
Do you have any suggestions about how to approach this issue?
I tried multiple solutions, the most viable seemed this, but it still does not work
export default async function handler(req, res) {
console.log(req.body);
let arr;
let file = req.body;
let contentBuffer = await new Response(file).arrayBuffer();
try {
var data = new Uint8Array(contentBuffer);
var workbook = XLSX.read(data, { type: "array" });
var sheet = workbook.Sheets[workbook.SheetNames[0]];
arr = XLSX.utils.sheet_to_json(sheet);
} catch (e) {
console.error("Error while reading the excel file");
console.log({ ...e });
res.status(500).json({ err: e });
}
res.status(200).json(arr);
}

Since you're uploading a file, you should start by disabling the body parser to consume the body as a stream.
I would also recommend using a third-party library like formidable to handle and parse the form data. You'll then be able to read the file using XLSX.read() and convert it to JSON.
import XLSX from "xlsx";
import formidable from "formidable";
// Disable `bodyParser` to consume as stream
export const config = {
api: {
bodyParser: false
}
};
export default async function handler(req, res) {
const form = new formidable.IncomingForm();
try {
// Promisified `form.parse`
const jsonData = await new Promise(function (resolve, reject) {
form.parse(req, async (err, fields, files) => {
if (err) {
reject(err);
return;
}
try {
const workbook = XLSX.readFile(files.file.path);
const sheet = workbook.Sheets[workbook.SheetNames[0]];
const jsonSheet = XLSX.utils.sheet_to_json(sheet);
resolve(jsonSheet);
} catch (err) {
reject(err);
}
});
});
return res.status(200).json(jsonData);
} catch (err) {
console.error("Error while parsing the form", err);
return res.status(500).json({ error: err });
}
}

Related

How do I make 2 (or more) calls with Adobe PDF Services and skip using the file system (in between?)

It's fairly simple to make one call to Adobe PDF Services, get the result, and save it, for example:
// more stuff above
exportPdfOperation.execute(executionContext)
.then(result => result.saveAsFile(output))
But if I want to do two, or more, operations, do I need to keep saving the result to the file system and re-providing it (is that even a word ;) to the API?
So this tripped me up as well. In most demos, you'll see:
result => result.saveAsFile()
towards the end. However, the object passes to the completed promise, result, is a FileRef object that can then be used as the input to another call.
Here's a sample that takes an input Word doc and calls the API method to create a PDF. It then takes that and runs OCR on it. Both methods that wrap the API calls return FileRefs, so at the end I saveAsFile on it. (Note, this demo is using v1 of the SDK, it would work the same w/ v2.)
const PDFToolsSdk = require('#adobe/documentservices-pdftools-node-sdk');
const fs = require('fs');
//clean up previous
(async ()=> {
// hamlet.docx was too big for conversion
const input = './hamlet2.docx';
const output = './multi.pdf';
const creds = './pdftools-api-credentials.json';
if(fs.existsSync(output)) fs.unlinkSync(output);
let result = await createPDF(input, creds);
console.log('got a result');
result = await ocrPDF(result, creds);
console.log('got second result');
await result.saveAsFile(output);
})();
async function createPDF(source, creds) {
return new Promise((resolve, reject) => {
const credentials = PDFToolsSdk.Credentials
.serviceAccountCredentialsBuilder()
.fromFile(creds)
.build();
const executionContext = PDFToolsSdk.ExecutionContext.create(credentials),
createPdfOperation = PDFToolsSdk.CreatePDF.Operation.createNew();
// Set operation input from a source file
const input = PDFToolsSdk.FileRef.createFromLocalFile(source);
createPdfOperation.setInput(input);
let stream = new Stream.Writable();
stream.write = function() {
}
stream.end = function() {
console.log('end called');
resolve(stream);
}
// Execute the operation and Save the result to the specified location.
createPdfOperation.execute(executionContext)
.then(result => resolve(result))
.catch(err => {
if(err instanceof PDFToolsSdk.Error.ServiceApiError
|| err instanceof PDFToolsSdk.Error.ServiceUsageError) {
reject(err);
} else {
reject(err);
}
});
});
}
async function ocrPDF(source, creds) {
return new Promise((resolve, reject) => {
const credentials = PDFToolsSdk.Credentials
.serviceAccountCredentialsBuilder()
.fromFile(creds)
.build();
const executionContext = PDFToolsSdk.ExecutionContext.create(credentials),
ocrOperation = PDFToolsSdk.OCR.Operation.createNew();
// Set operation input from a source file.
//const input = PDFToolsSdk.FileRef.createFromStream(source);
ocrOperation.setInput(source);
let stream = new Stream.Writable();
stream.end = function() {
console.log('end called');
resolve(stream);
}
// Execute the operation and Save the result to the specified location.
ocrOperation.execute(executionContext)
.then(result => resolve(result))
.catch(err => reject(err));
});
}

How to save a file in a Firestore database from a multipart/formData request?

I have a firebase function :
//Functions and firestore requirements are here
exports.saveData = functions.https.onRequest(async (req, res) => {
console.log("Data received")
const busboy = new Busboy({headers: req.headers});
const fields = {}
const uploads = {}
//Push fields that are not file in fields
busboy.on('field', (fieldname, val) => {
console.log(`Processed field ${fieldname}: ${val}.`);
fields[fieldname] = val;
});
//Push files in uploads
busboy.on('file', (fieldname, file, filename) => {
console.log('File :', file);
const filepath = path.join(tmpdir, filename);
uploads[fieldname] = filepath;
});
busboy.on('finish', () => {
console.log(uploads)
console.log(fields)
db.collection("a_collection").add({
a: fields.a,
b: fields.b,
file: "Help ! From the client, I send an image. Which value do I need to save ?"
})
.then(function () {
res.send("Data is saved")
})
.catch(function (error) {
res.status(400).send(error)
console.error("Error :" + error)
})
});
busboy.end(req.rawBody);
})
I want to save image and data from multipart/formData requests in a Firestore database. How can I do this ? Do I need to save a base64 image or there is an other way to save files in google cloud firestore ?
My english is not perfect, Sorry :/
You need to save it as Base64 test so that you can upload it to Firestore.
So what you can do is read your file transform it to Base64 and then add it to your Firestore database.
To get the Base 64 you can use this
//Get Base64 of file
function getBase64(file){
var reader = new FileReader();
reader.onerror = function (error) {
console.log('Error: ', error);
};
reader.readAsDataURL(file);
reader.onload = function () {
let encoded = reader.result.split(',');
//you split this to get mimetype out of your base64
addForSale(Date.now().toString(10), { uFile: encoded[1]});
// I just used a timestamp as the ID
}
};
The other alternative is to upload the files to Cloud Storage and save in Firestore the location of the file in Cloud Storage.

Puppeteer and express can not load new data using REST API

I'm using puppeteer to scrape page that has contents that change periodically and use express to present data in rest api.
If I turn on headless chrome to see what is being shown in the browser, the new data is there, but the data is not showing up in get() and http://localhost:3005/api-weather. The normal browser only shows the original data.
const express = require('express');
const server = new express();
const cors = require('cors');
const morgan = require('morgan');
const puppeteer = require('puppeteer');
server.use(morgan('combined'));
server.use(
cors({
allowHeaders: ['sessionId', 'Content-Type'],
exposedHeaders: ['sessionId'],
origin: '*',
methods: 'GET, HEAD, PUT, PATCH, POST, DELETE',
preflightContinue: false
})
);
const WEATHER_URL = 'https://forecast.weather.gov/MapClick.php?lat=40.793588904953985&lon=-73.95738513173298';
const hazard_url2 = `file://C:/Users/xdevtran/Documents/vshome/wc_api/weather-forecast-nohazard.html`;
(async () => {
try {
const browser = await puppeteer.launch({ headless: true });
const page = await browser.newPage();
await page.setRequestInterception(true);
page.on("request", request => {
console.log(request.url());
request.continue();
});
await page.goto(hazard_url2, { timeout: 0, waitUntil: 'networkidle0' });
hazard = {
"HazardTitle": "stub",
"Hazardhref": "stub"
}
let forecast = await page.evaluate(() => {
try {
let forecasts = document.querySelectorAll("#detailed-forecast-body.panel-body")[0].children;
let weather = [];
for (var i = 0, element; element = forecasts[i]; i++) {
period = element.querySelector("div.forecast-label").textContent;
forecast = element.querySelector("div.forecast-text").textContent;
weather.push(
{
period,
forecast
}
)
}
return weather;
} catch (err) {
console.log('error in evaluate: ', err);
res.end();
}
}).catch(err => {
console.log('err.message :', err.message);
});
weather = forecast;
server.get('/api-weather', (req, res) => {
try {
res.end(JSON.stringify(weather, null, ' '));
console.log(weather);
} catch (err) {
console.log('failure: ', err);
res.sendStatus(500);
res.end();
return;
}
});
} catch (err) {
console.log('caught error :', err);
}
browser.close();
})();
server.listen(3005, () => {
console.log('http://localhost:3005/api-weather');
});
I've tried several solutions WaitUntil, WaitFor, .then and sleep but nothing seems to work.
I wonder if it has something to do with express get()? I'm using res.end() instead of res.send() is because the json looks better when I use res.end(). I don't really know the distinction.
I'm also open to using this reload solution. But I received errors and didn't use it.
I also tried waitForNavigation(), but I don't know how it works, either.
Maybe I'm using the wrong search term to find the solution. Could anyone point me in the right direction? Thank you.

Mongoose save() does not work when putting documents in collection and uploading files using multer-gridfs-storage

I am trying to build a music app and while working on the back end for the app (using express), I am facing this weird issue of documents not saving in mongo collections.
I made a post route to which user submits form data, which contains the song's mp3 file and the name of the song (it will have more data later on).
I am using multer to parse multipart form data.
I am able to save the mp3 file to mongoDB using multer-gridfs-storage. I want to save the song info such as name, artists etc in a different collection and here is the schema for the collection:
import mongoose from 'mongoose';
const Schema = mongoose.Schema;
const SongsInfo = new Schema({
name: {
type: String,
required: true,
},
});
const Song = mongoose.model('Song', SongsInfo);
export default Song;
index.js file:
import Grid from 'gridfs-stream';
import GridFsStorage from 'multer-gridfs-storage';
const app = express();
const conn = mongoose.createConnection(mongoURI);
let gfs;
conn.once('open', () => {
console.log('Connected to mongodb');
gfs = Grid(conn.db, mongoose.mongo);
gfs.collection('songFiles');
});
// storage engine
const storage = new GridFsStorage({
url: mongoURI,
file: (req, file) => new Promise((resolve, reject) => {
crypto.randomBytes(16, (err, buf) => {
if (err) {
return reject(err);
}
const filename = buf.toString('hex') +
path.extname(file.originalname);
let fileInfo;
fileInfo = {
filename,
bucketName: 'songFiles',
};
resolve(fileInfo);
});
}),
});
let upload;
middleWare(app);
app.post('/api/uploadSong', async (req, res) => {
upload = multer({ storage }).any();
upload(req, res, async (err) => {
console.log('in');
if (err) {
// console.log(err);
return res.end('Error uploading file.');
}
const { name } = req.body;
// push a Song into songs collection
const songInfo = new Song({
name,
});
const si = await songInfo.save(); // (*)
console.log(songInfo);
res.json({
songInfo: si,
file: req.file,
});
});
});
On line (*) the server just freezes until the request gets timed out.
No errors shown on console. Don't know what to do :(
I solved the issue finally!
So what i did was bring the models in index.js file and changed up some stuff here and there..
index.js
const app = express();
mongoose.connect(mongoURI); //(*)
const conn = mongoose.connection; // (*)
let gfs;
conn.once('open', () => {
console.log('Connected to mongodb');
gfs = Grid(conn.db, mongoose.mongo);
gfs.collection('songFiles');
});
// models
const Schema = mongoose.Schema; //(***)
const SongsInfo = new Schema({
name: {
type: String,
required: true,
},
});
const Song = mongoose.model('Song', SongsInfo);
// storage engine
const storage = new GridFsStorage({
url: mongoURI,
file: (req, file) => new Promise((resolve, reject) => {
crypto.randomBytes(16, (err, buf) => {
if (err) {
return reject(err);
}
const filename = buf.toString('hex') + path.extname(file.originalname);
let fileInfo;
if (file.mimetype === 'image/jpeg' || file.mimetype === 'image/png') {
fileInfo = {
filename,
bucketName: 'imageFiles',
};
} else {
fileInfo = {
filename,
bucketName: 'songFiles',
};
}
resolve(fileInfo);
});
}),
});
let upload;
middleWare(app);
app.post('/api/uploadSong', async (req, res) => {
upload = multer({ storage }).any();
upload(req, res, async (err) => {
console.log('in');
if (err) {
return res.end('Error uploading file.');
}
const { name } = req.body;
// push a Song into songs collection
const songInfo = new Song({
name,
});
songInfo.save((er, song) => {
if (!er) {
res.send('err');
} else {
console.log(err, song);
res.send('err');
}
});
});
});
At line (***) I used the same instance of mongoose that was initialized.. in the previous file I imported it again from the package...
Kia ora!
For those stumbling here three years on. I came across this issue as well. Abhishek Mehandiratta has the answer hidden in their code snippet.
The Fix
I went from instantiating mongoose with:
connect(connStr)
to doing:
const conn = createConnection(connStr)
This is the breaking change. So for an easy fix, change your code back to using connect(...).
I too, followed the documentation and Youtube tutorials to alter my code in such a way. It's an unfortunate misleading for developers who have not encountered a need to understand the difference.
I changed it back, and now it's working again (even with 'multer-gridfs-storage'). You can reference the connection with:
import {connection} from "mongoose";
connection.once('open', ...)
Why is this happening?
BenSower writes up the differences between connect and createConnection here. So from my basic understanding of BenSower's write up, you're referencing the wrong connection pool when you create your 'Song' schema, and thus, referencing the wrong pool when saving. I guess this results in a time out, have not looked further, but I'm sure a more in-depth answer exists somewhere.
import mongoose from 'mongoose';
const Song = mongoose.model('Song', SongsInfo);
As BenSower states in their answer "For me, the big disadvantage of creating multiple connections in the same module, is the fact that you can not directly access your models through mongoose.model, if they were defined "in" different connections". They're spot on with this one. Having finally overcome this obstacle, I shall have a look into createConnection on another project. For now, good ol' connect() will fix this issue, and do just fine :D

nodejs display image stored in gridFS to html

Hi I'm new to nodejs and gridFS
I'm trying to display images stored in gridFS to my html page
Currently, I am using this code.
gfs.exist(options, function(err, found){
if(err) return handleError(err);
if(found)
{
console.log("Found Image");
var fs_write_stream = fs.createWriteStream('public/images/'+req.user._id + '_photo' + '.jpg');
var readstream = gfs.createReadStream({
filename: req.user._id + '_photo'
});
readstream.pipe(fs_write_stream);
readstream.on('close', function(){
console.log('file has been written fully');
res.render('profile', {
user : req.user,
message: req.flash('info'),
user_photo_url: 'images/'+req.user._id+'_photo.jpg'
});
});
}
});
But my code need to download image from gridFS. If my server storage is not enough, it should be problematic
Is there any method to display gridFS images to html directly?
Add a route for resources in your images directory and pipe the gridfs readstream to the response directly like so
app.get('/images/:name', function(req, res) {
var readstream = gfs.createReadStream({
filename: req.param('name');
});
res.pipe(readstream);
})
In your html, all you need to do is specify the src url in your images correctly
var pi_id = fields.pic_id;
gfs.findOne({ _id: pi_id }, function (err, file) {
console.log(file);
if (err) return res.status(400).send(err);
if (!file) return res.status(404).send('');
res.set('Content-Type', file.contentType);
res.set('Content-Disposition', 'attachment; filename="' + file.filename + '"');
var readstream = gfs.createReadStream({
_id: file._id
});
readstream.on("error", function(err) {
console.log("Got error while processing stream " + err.message);
res.end();
});
readstream.pipe(res);
console.log(readstream.pipe(res))
});
Try the function like below,
function(req,res){
gfs.files.findOne({ filename: req.params.filename }, (err, file) => {
res.contentType(file.contentType);
// Check if image
if (file) {
// Read output to browser
const readstream = gfs.createReadStream(file.filename);
readstream.pipe(res);
} else {
console.log(err);
}
});
};