Create a zip file on S3 from files on S3 using Lambda Node
I need to create a Zip file that consists of a selection of files (videos and images) located in my s3 bucket.
The problem at the moment using my code below is that I quickly hit the memory limit on Lambda.
async.eachLimit(files, 10, function(file, next) {
var params = {
Bucket: bucket, // bucket name
Key: file.key
};
s3.getObject(params, function(err, data) {
if (err) {
console.log('file', file.key);
console.log('get image files err',err, err.stack); // an error occurred
} else {
console.log('file', file.key);
zip.file(file.key, data.Body);
next();
}
});
},
function(err) {
if (err) {
console.log('err', err);
} else {
console.log('zip', zip);
content = zip.generateNodeStream({
type: 'nodebuffer',
streamFiles:true
});
var params = {
Bucket: bucket, // name of dest bucket
Key: 'zipped/images.zip',
Body: content
};
s3.upload(params, function(err, data) {
if (err) {
console.log('upload zip to s3 err',err, err.stack); // an error occurred
} else {
console.log(data); // successful response
}
});
}
});
Is this possible using Lambda, or should I look at a different approach?
Is it possible to write to a compressed zip file on the fly, therefore eliminating the memory issue somewhat, or do I need to have the files collected before compression?
Any help would be much appreciated.
Solution 1:
Okay, I got to do this today and it works. Direct Buffer to Stream, no disk involved. So memory or disk limitation won't be an issue here:
'use strict';
const AWS = require("aws-sdk");
AWS.config.update( { region: "eu-west-1" } );
const s3 = new AWS.S3( { apiVersion: '2006-03-01'} );
const _archiver = require('archiver');
//This returns us a stream.. consider it as a real pipe sending fluid to S3 bucket.. Don't forget it
const streamTo = (_bucket, _key) => {
var stream = require('stream');
var _pass = new stream.PassThrough();
s3.upload( { Bucket: _bucket, Key: _key, Body: _pass }, (_err, _data) => { /*...Handle Errors Here*/ } );
return _pass;
};
exports.handler = async (_req, _ctx, _cb) => {
var _keys = ['list of your file keys in s3'];
var _list = await Promise.all(_keys.map(_key => new Promise((_resolve, _reject) => {
s3.getObject({Bucket:'bucket-name', Key:_key})
.then(_data => _resolve( { data: _data.Body, name: `${_key.split('/').pop()}` } ));
}
))).catch(_err => { throw new Error(_err) } );
await new Promise((_resolve, _reject) => {
var _myStream = streamTo('bucket-name', 'fileName.zip'); //Now we instantiate that pipe...
var _archive = _archiver('zip');
_archive.on('error', err => { throw new Error(err); } );
//Your promise gets resolved when the fluid stops running... so that's when you get to close and resolve
_myStream.on('close', _resolve);
_myStream.on('end', _resolve);
_myStream.on('error', _reject);
_archive.pipe(_myStream); //Pass that pipe to _archive so it can push the fluid straigh down to S3 bucket
_list.forEach(_itm => _archive.append(_itm.data, { name: _itm.name } ) ); //And then we start adding files to it
_archive.finalize(); //Tell is, that's all we want to add. Then when it finishes, the promise will resolve in one of those events up there
}).catch(_err => { throw new Error(_err) } );
_cb(null, { } ); //Handle response back to server
};
Solution 2:
I formated the code according to @iocoker.
main entry
// index.js
'use strict';
const S3Zip = require('./s3-zip')
const params = {
files: [
{
fileName: '1.jpg',
key: 'key1.JPG'
},
{
fileName: '2.jpg',
key: 'key2.JPG'
}
],
zippedFileKey: 'zipped-file-key.zip'
}
exports.handler = async event => {
const s3Zip = new S3Zip(params);
await s3Zip.process();
return {
statusCode: 200,
body: JSON.stringify(
{
message: 'Zip file successfully!'
}
)
};
}
Zip file util
// s3-zip.js
'use strict';
const fs = require('fs');
const AWS = require("aws-sdk");
const Archiver = require('archiver');
const Stream = require('stream');
const https = require('https');
const sslAgent = new https.Agent({
KeepAlive: true,
rejectUnauthorized: true
});
sslAgent.setMaxListeners(0);
AWS.config.update({
httpOptions: {
agent: sslAgent,
},
region: 'us-east-1'
});
module.exports = class S3Zip {
constructor(params, bucketName = 'default-bucket') {
this.params = params;
this.BucketName = bucketName;
}
async process() {
const { params, BucketName } = this;
const s3 = new AWS.S3({ apiVersion: '2006-03-01', params: { Bucket: BucketName } });
// create readstreams for all the output files and store them
const createReadStream = fs.createReadStream;
const s3FileDwnldStreams = params.files.map(item => {
const stream = s3.getObject({ Key: item.key }).createReadStream();
return {
stream,
fileName: item.fileName
}
});
const streamPassThrough = new Stream.PassThrough();
// Create a zip archive using streamPassThrough style for the linking request in s3bucket
const uploadParams = {
ACL: 'private',
Body: streamPassThrough,
ContentType: 'application/zip',
Key: params.zippedFileKey
};
const s3Upload = s3.upload(uploadParams, (err, data) => {
if (err) {
console.error('upload err', err)
} else {
console.log('upload data', data);
}
});
s3Upload.on('httpUploadProgress', progress => {
// console.log(progress); // { loaded: 4915, total: 192915, part: 1, key: 'foo.jpg' }
});
// create the archiver
const archive = Archiver('zip', {
zlib: { level: 0 }
});
archive.on('error', (error) => {
throw new Error(`${error.name} ${error.code} ${error.message} ${error.path} ${error.stack}`);
});
// connect the archiver to upload streamPassThrough and pipe all the download streams to it
await new Promise((resolve, reject) => {
console.log("Starting upload of the output Files Zip Archive");
streamPassThrough.on('close', resolve());
streamPassThrough.on('end', resolve());
streamPassThrough.on('error', reject());
archive.pipe(streamPassThrough);
s3FileDwnldStreams.forEach((s3FileDwnldStream) => {
archive.append(s3FileDwnldStream.stream, { name: s3FileDwnldStream.fileName })
});
archive.finalize();
}).catch((error) => {
throw new Error(`${error.code} ${error.message} ${error.data}`);
});
// Finally wait for the uploader to finish
await s3Upload.promise();
}
}
Solution 3:
The other solutions are great for not so many files (less than ~60). If they handle more files, they just quit into nothing with no errors. This is because they open too many streams.
This solution is inspired by https://gist.github.com/amiantos/16bacc9ed742c91151fcf1a41012445e
It is a working solution, which works well even with many files (+300) and returns a presigned URL to the zip which contains the files.
Main Lambda:
const AWS = require('aws-sdk');
const S3 = new AWS.S3({
apiVersion: '2006-03-01',
signatureVersion: 'v4',
httpOptions: {
timeout: 300000 // 5min Should Match Lambda function timeout
}
});
const archiver = require('archiver');
import stream from 'stream';
const UPLOAD_BUCKET_NAME = "my-s3-bucket";
const URL_EXPIRE_TIME = 5*60;
export async function getZipSignedUrl(event) {
const prefix = `uploads/id123123/}`; //replace this with your S3 prefix
let files = ["12314123.png", "56787567.png"] //replace this with your files
if (files.length == 0) {
console.log("No files to zip");
return result(404, "No pictures to download");
}
console.log("Files to zip: ", files);
try {
files = files.map(file => {
return {
fileName: file,
key: prefix + '/' + file,
type: "file"
};
});
const destinationKey = prefix + '/' + 'uploads.zip'
console.log("files: ", files);
console.log("destinationKey: ", destinationKey);
await streamToZipInS3(files, destinationKey);
const presignedUrl = await getSignedUrl(UPLOAD_BUCKET_NAME, destinationKey, URL_EXPIRE_TIME, "uploads.zip");
console.log("presignedUrl: ", presignedUrl);
if (!presignedUrl) {
return result(500, null);
}
return result(200, presignedUrl);
}
catch(error) {
console.error(`Error: ${error}`);
return result(500, null);
}
}
Helper functions:
export function result(code, message) {
return {
statusCode: code,
body: JSON.stringify(
{
message: message
}
)
}
}
export async function streamToZipInS3(files, destinationKey) {
await new Promise(async (resolve, reject) => {
var zipStream = streamTo(UPLOAD_BUCKET_NAME, destinationKey, resolve);
zipStream.on("error", reject);
var archive = archiver("zip");
archive.on("error", err => {
throw new Error(err);
});
archive.pipe(zipStream);
for (const file of files) {
if (file["type"] == "file") {
archive.append(getStream(UPLOAD_BUCKET_NAME, file["key"]), {
name: file["fileName"]
});
}
}
archive.finalize();
})
.catch(err => {
console.log(err);
throw new Error(err);
});
}
function streamTo(bucket, key, resolve) {
var passthrough = new stream.PassThrough();
S3.upload(
{
Bucket: bucket,
Key: key,
Body: passthrough,
ContentType: "application/zip",
ServerSideEncryption: "AES256"
},
(err, data) => {
if (err) {
console.error('Error while uploading zip')
throw new Error(err);
reject(err)
return
}
console.log('Zip uploaded')
resolve()
}
).on("httpUploadProgress", progress => {
console.log(progress)
});
return passthrough;
}
function getStream(bucket, key) {
let streamCreated = false;
const passThroughStream = new stream.PassThrough();
passThroughStream.on("newListener", event => {
if (!streamCreated && event == "data") {
const s3Stream = S3
.getObject({ Bucket: bucket, Key: key })
.createReadStream();
s3Stream
.on("error", err => passThroughStream.emit("error", err))
.pipe(passThroughStream);
streamCreated = true;
}
});
return passThroughStream;
}
export async function getSignedUrl(bucket: string, key: string, expires: number, downloadFilename?: string): Promise<string> {
const exists = await objectExists(bucket, key);
if (!exists) {
console.info(`Object ${bucket}/${key} does not exists`);
return null
}
let params = {
Bucket: bucket,
Key: key,
Expires: expires,
};
if (downloadFilename) {
params['ResponseContentDisposition'] = `inline; filename="${encodeURIComponent(downloadFilename)}"`;
}
try {
const url = s3.getSignedUrl('getObject', params);
return url;
} catch (err) {
console.error(`Unable to get URL for ${bucket}/${key}`, err);
return null;
}
};