stream s3 to dynamodb with fast-csv : not all data inserted - amazon-s3

When a csv file is uploaded on my s3 bucket, my lambda will be triggered to insert my data into DynamoDB.
I need a stream because the file is too large to be downloaded as full object.
const batchWrite = async (clientDynamoDB, itemsToProcess) => {
const ri = {};
ri[TABLE_DYNAMO] = itemsToProcess.map((itm) => toPutRequest(itm));
const params = { RequestItems: ri };
await clientDynamoDB.batchWriteItem(params).promise();
};
function runStreamPromiseAsync(stream, clientDynamoDB) {
return new Promise((resolve, reject) => {
const sizeChunk = 25;
let itemsToProcess = [];
stream
.pipe(fastCsv.parse({headers: Object.keys(schemaGeData), trim: true}))
.on("data", (row) => {
stream.pause();
itemsToProcess.push(row);
if (itemsToProcess.length === sizeChunk) {
batchWrite(clientDynamoDB, itemsToProcess).finally(() => {
stream.resume();
});
itemsToProcess = [];
}
})
.on("error", (err) => {
console.log(err);
reject("Error");
})
.on("end", () => {
stream.pause();
console.log("end");
batchWrite(clientDynamoDB, itemsToProcess).finally(() => {
resolve("OK");
});
});
});
}
module.exports.main = async (event, context, callback) => {
context.callbackWaitsForEmptyEventLoop = false;
const AWS = require('aws-sdk');
const s3 = new AWS.S3();
const object = event.Records[0].s3;
const bucket = object.bucket.name;
const file = object.object.key;
const agent = new https.Agent({
keepAlive: true
});
const client = new AWS.DynamoDB({
httpOptions: {
agent
}
});
try {
//get Stream csv data
const stream = s3
.getObject({
Bucket: bucket,
Key: file
})
.createReadStream()
.on('error', (e) => {
console.log(e);
});
await runStreamPromiseAsync(stream, client);
} catch (e) {
console.log(e);
}
};
When my file is 1000 lines everything is inserted but when I have 5000 lines, my function insert only around 3000 lines and this number is random... Sometimes more sometimes less..
So I'd like to understand what am I missing here ?
I also read this article but to be honest even if you pause the second stream, the first one is still running.. So if someone have any ideas on how to do this, it would be greatly appreciated !
Thanks

I found out why It was not fully processed, it's because the callback of batchWriteItem can return unprocess Items. So I change the function batchWrite and also the runPromiseStreamAsync a little bit because i might not have all the items processed from itemsToProcess.
Anyway here is the full code :
const batchWrite = (client, itemsToProcess) => {
const ri = {};
ri[TABLE_DYNAMO] = itemsToProcess.map((itm) => toPutRequest(itm));
const items = { RequestItems: ri };
const processItemsCallback = function(err, data) {
return new Promise((resolve, reject) => {
if(!data || data.length === 0){
return resolve();
}
if(err){
return reject(err);
}
let params = {};
params.RequestItems = data.UnprocessedItems;
return client.batchWriteItem(params, processItemsCallback);
});
};
return client.batchWriteItem(items, processItemsCallback );
};
function runStreamPromiseAsync(stream, clientDynamoDB) {
return new Promise((resolve, reject) => {
const sizeChunk = 25;
let itemsToProcess = [];
let arrayPromise = [];
stream
.pipe(fastCsv.parse({headers: Object.keys(schemaGeData), trim: true}))
.on("error", (err) => {
console.log(err);
reject("Error");
})
.on('data', data => {
itemsToProcess.push(data);
if(itemsToProcess.length === sizeChunk){
arrayPromise.push(batchWrite(clientDynamoDB, itemsToProcess));
itemsToProcess = [];
}
})
.on('end', () => {
if(itemsToProcess.length !== 0){
arrayPromise.push(batchWrite(clientDynamoDB, itemsToProcess));
}
resolve(Promise.all(arrayPromise).catch(e => {
reject(e)
}));
});
});
}

Related

vue method for loop wait for function complete

In this vue component, I have a method containing a for loop, calling another method. The second method does a request to the appserver. I need the first function waiting for the second to continue the for-loop. I've tried several async await options but doesn't understand how to implement it.
methods: {
selectFiles(files) {
this.progressInfos = [];
this.selectedFiles = files;
},
uploadFiles() {
this.message = "";
//var result = 0;
for (let i = 0; i < this.selectedFiles.length; i++) {
console.log(i)
//result = await this.upload(i, this.selectedFiles[i]);
this.upload(i, this.selectedFiles[i]);
}
},
upload(idx, file) {
this.progressInfos[idx] = { percentage: 0, fileName: file.name };
//console.log("FinDocuNum:" + financialDocument.finDocId)
FinancialDocumentDataService.upload(1, file, (event) => {
this.progressInfos[idx].percentage = Math.round(100 * event.loaded / event.total);
}).then((response) => {
let prevMessage = this.message ? this.message + "\n" : "";
this.message = prevMessage + response.status;
return 1;
}).catch(() => {
this.progressInfos[idx].percentage = 0;
this.message = "Could not upload the file:" + file.name;
return 0;
});
}
}
The upload function must be async and return a promise like this:
async upload(file) {
return new Promise((resolve, reject) => {
axios({url: url, data: file, method: 'POST'})
.then(resp => {
resolve(resp)
})
.catch(err => {
reject(err)
})
})
},

Jest integration test Express REST API with Mongoose

everybody. I'm new to unit/integration testing and I'm having trouble with testing one of my API routes which involves file system operations and Mongoose model method calls. I need to be able mock mongoose model method as well as router's post method. Let me share you my router's post method.
documents.js
const { User } = require('../models/user');
const { Document } = require('../models/document');
const isValidObjectId = require('./../helpers/isValidObjectId');
const createError = require('./../helpers/createError');
const path = require('path');
const fs = require('fs');
const auth = require('./../middlewares/auth');
const uploadFile = require('./../middlewares/uploadFile');
const express = require('express');
const router = express.Router();
.
.
.
router.post('/mine', [auth, uploadFile], async (req, res) => {
const user = await User.findById(req.user._id);
user.leftDiskSpace(function(err, leftSpace) {
if(err) {
return res.status(400).send(createError(err.message, 400));
} else {
if(leftSpace < 0) {
fs.access(req.file.path, (err) => {
if(err) {
res.status(403).send(createError('Your plan\'s disk space is exceeded.', 403));
} else {
fs.unlink(req.file.path, (err) => {
if(err) res.status(500).send('Silinmek istenen doküman diskten silinemedi.');
else res.status(403).send(createError('Your plan\'s disk space is exceeded.', 403));
});
}
});
} else {
let document = new Document({
filename: req.file.filename,
path: `/uploads/${req.user.username}/${req.file.filename}`,
size: req.file.size
});
document.save()
.then((savedDocument) => {
user.documents.push(savedDocument._id);
user.save()
.then(() => res.send(savedDocument));
});
}
}
});
});
.
.
.
module.exports = router;
documents.test.js
const request = require('supertest');
const { Document } = require('../../../models/document');
const { User } = require('../../../models/user');
const mongoose = require('mongoose');
const fs = require('fs');
const path = require('path');
const config = require('config');
let server;
describe('/api/documents', () => {
beforeEach(() => { server = require('../../../bin/www'); });
afterEach(async () => {
let pathToTestFolder = path.join(process.cwd(), config.get('diskStorage.destination'), 'user');
await fs.promises.access(pathToTestFolder)
.then(() => fs.promises.rm(pathToTestFolder, { recursive: true }))
.catch((err) => { return; });
await User.deleteMany({});
await Document.deleteMany({});
server.close();
});
.
.
.
describe('POST /mine', () => {
let user;
let token;
let file;
const exec = async () => {
return await request(server)
.post('/api/documents/mine')
.set('x-auth-token', token)
.attach('document', file);
}
beforeEach(async () => {
user = new User({
username: 'user',
password: '1234'
});
user = await user.save();
user.leftDiskSpace(function(err, size) { console.log(size); });
token = user.generateAuthToken();
file = path.join(process.cwd(), 'tests', 'integration', 'files', 'test.json');
});
.
.
.
it('should return 400 if an error occurs during calculation of authorized user\'s left disk space', async () => {
jest.mock('../../../routes/documents');
let documentsRouter = require('../../../routes/documents');
let mockReq = {};
let mockRes = {}
let mockPostRouter = jest.fn();
mockPostRouter.mockImplementation((path, callback) => {
if('path' === '/mine') callback(mockReq, mockRes);
});
documentsRouter.post = mockPostRouter;
let error = new Error('Something went wrong...');
const res = await exec();
console.log(res.body);
expect(res.status).toBe(400);
expect(res.body.error).toHaveProperty('message', 'Something went wrong...');
});
.
.
.
});
});
What I want to do is, I need to be able call a mock user.leftDiskSpace(function(err, leftSpace)) user model method inside router.post('/mine', ...) route handler. I need to be able to get inside the if and else brances by callback function of user.leftDiskSpace(). How can I do that?
Thanks in advance.

not playing audio from buffer stored in the database

I've spent 2 days with this thing and couldn't find a solution but seems like im getting closer. The goal is to record audio in the browser and store in the the database and get it whenever i need it.
I encoded the audio to base64 and sent it to the server stored as binary in the mongodb, created a get route in which i find the audio by id and send the buffer via res.send() along with the content type set to audio/webm (default mime type)
but the thing is I'm getting this blank video, seems like it's not knowing how to decode it or something. There might be something wrong with the content type.
navigator.mediaDevices.getUserMedia({ audio: true, video: false })
.then((mediaRecorderObj) => {
let mediaRecorder = new MediaRecorder(mediaRecorderObj, { mimType: 'audio/webm;base64' })
const m = mediaRecorder;
let chunk = []
startBtn.addEventListener('click', () => {
mediaRecorder.start()
console.log('started recording..');
})
endBtn.addEventListener('click', () => {
mediaRecorder.stop()
console.log('just stopped recording');
})
//-----When Data Is Available
mediaRecorder.ondataavailable = (e) => {
chunk.push(e.data);
}
//--------When finished adding it to chunk
mediaRecorder.onstop = () => {
const blob = new Blob(chunk, { 'type': 'audio/webm' })
chunk = []
const reader = new FileReader()
reader.readAsDataURL(blob)
reader.onloadend = async () => {
const buffer = reader.result
const bodyObj = {
voice: buffer
}
await fetch('http://localhost:3000/upload-spoken-bio', {
method: 'post',
body: JSON.stringify(bodyObj),
headers: { 'Content-Type': 'application/json' }
})
}
}
})
and this is the server side
//----------Upload
spokenBio.post('/upload-spoken-bio', async (req, res) => {
const buffer = req.body
try {
const newBuffer = new SpokenCon(buffer)
await newBuffer.save()
}
catch (err) {
console.log(err.message);
}
})
//----------Retrieve
spokenBio.get('/get-spoken-bio/:id', async (req, res) => {
const id = req.params.id
try {
const field = await SpokenCon.findById(id)
const binary = field.voice
res.set({ 'Content-Type': 'audio/webm' })
res.send(binary)
}
catch (err) {
console.log(err.message);
}
})

How to test FileReader wrapped inside promise?

Current code:
export const readFilePromise = file => {
return new Promise((resolve, reject) => {
let reader = new FileReader();
reader.readAsDataURL(file);
reader.onload = () => {
resolve(reader.result);
};
reader.onerror = error => {
reject(error);
};
});
};
I am able to test the onload method
it('readFilePromise method', async () => {
const fileContentsEncodedInHex = [
'\x45\x6e\x63\x6f\x64\x65\x49\x6e\x48\x65\x78\x42\x65\x63\x61\x75\x73\x65\x42\x69\x6e\x61\x72\x79\x46\x69\x6c\x65\x73\x43\x6f\x6e\x74\x61\x69\x6e\x55\x6e\x70\x72\x69\x6e\x74\x61\x62\x6c\x65\x43\x68\x61\x72\x61\x63\x74\x65\x72\x73'
];
const blob = new Blob(fileContentsEncodedInHex);
const result = readFilePromise(blob);
const output = await result;
const expectedOutput =
'data:;base64,RW5jb2RlSW5IZXhCZWNhdXNlQmluYXJ5RmlsZXNDb250YWluVW5wcmludGFibGVDaGFyYWN0ZXJz';
expect(output).toEqual(expectedOutput);
});
I wonder how do I test the onerror?
If you had something like below because of this
reader.onerror = () => {
reject(reader.error);
};
Then the below should work.
Just know beforehand error message for passing empty file or nothing. I just wanted to create a scenario
describe('Error Case', () => {
it('should return error message for empty file', () => {
const result = readFilePromise();
const n = () => {};
return result.then(n, (errorMessage) => {
expect('your error message here').to.equal(errorMessage);
});
});

Image upload using react-admin

I am new to react-admin. I am using react-admin to upload the file. I have following the step mentioned below in tutorial.
But after I submit the request...I see http trace as follow. I see blob link instead of Base64 image payload.
{
"pictures": {
"rawFile": {
"preview": "blob:http://127.0.0.1:3000/fedcd180-cdc4-44df-b8c9-5c7196788dc6"
},
"src": "blob:http://127.0.0.1:3000/fedcd180-cdc4-44df-b8c9-5c7196788dc6",
"title": "Android_robot.png"
}
}
Can someone please advice how to get base64 image payload instead of link?
Check to see if you have this handler, most likely you did not change the name of the resource posts to your:
const addUploadCapabilities = requestHandler => (type, resource, params) => {
if (type === 'UPDATE' && resource === 'posts') {
Create your custom dataProvider to convert picture to base64
import restServerProvider from 'ra-data-json-server';
const servicesHost = 'http://localhost:8080/api';
const dataProvider = restServerProvider(servicesHost);
const myDataProfider = {
...dataProvider,
create: (resource, params) => {
if (resource !== 'your-route' || !params.data.pictures) {
// fallback to the default implementation
return dataProvider.create(resource, params);
}
const myFile = params.data.pictures;
if ( !myFile.rawFile instanceof File ){
return Promise.reject('Error: Not a file...'); // Didn't test this...
}
return Promise.resolve( convertFileToBase64(myFile) )
.then( (picture64) => ({
src: picture64,
title: `${myFile.title}`
}))
.then( transformedMyFile => dataProvider.create(resource, {
...params,
data: {
...params.data,
myFile: transformedMyFile
}
}));
}
};
const convertFileToBase64 = file => new Promise((resolve, reject) => {
const reader = new FileReader();
reader.readAsDataURL(file.rawFile);
reader.onload = () => resolve(reader.result);
reader.onerror = reject;
});
export default myDataProfider;
And get image data at your Server API
exports.create = (req, res) => {
if(req.body.myFile){
var file = req.body.myFile;
var fs = require('fs');
var data = file.src.replace(/^data:image\/\w+;base64,/, "");
var buf = Buffer.from(data, 'base64');
fs.writeFile(`upload/${file.title}`, buf, err => {
if (err) throw err;
console.log('Saved!');
});
}};