Limiting asynchronous calls in Node.js
I've got a Node.js app that gets a list of file locally and uploads them to a server. This list could contain thousands of files.
for (var i = 0; i < files.length; i++) {
upload_file(files[i]);
}
If I execute this with thousands of files, upload_file will get called thousands of times all at once, and most likely die (or at least struggle). In the synchronous world, we'd create a thread pool and limit it to a certain number of threads. Is there a simple way to limit how many asynchronous calls get executed at once?
Solution 1:
As usual, I recommend Caolan McMahon's async module.
Make your upload_file
function take a callback as it's second parameter:
var async = require("async");
function upload_file(file, callback) {
// Do funky stuff with file
callback();
}
var queue = async.queue(upload_file, 10); // Run ten simultaneous uploads
queue.drain = function() {
console.log("All files are uploaded");
};
// Queue your files for upload
queue.push(files);
queue.concurrency = 20; // Increase to twenty simultaneous uploads
Solution 2:
The answer above, re: async on NPM is the best answer, but if you'd like to learn more about control flow:
You should look into control flow patterns. There's a wonderful discussion on control flow patterns in Chapter 7 of Mixu's Node Book. Namely, I'd look at the example in 7.2.3: Limited parallel - an asynchronous, parallel, concurrency limited for loop.
I've adapted his example:
function doUpload() {
// perform file read & upload here...
}
var files = [...];
var limit = 10; // concurrent read / upload limit
var running = 0; // number of running async file operations
function uploader() {
while(running < limit && files.length > 0) {
var file = files.shift();
doUpload(file, function() {
running--;
if(files.length > 0)
uploader();
});
running++;
}
}
uploader();
Solution 3:
You should try queueing. I assume that a callback is fired when upload_file()
finishes. Something like this should do the trick (untested):
function upload_files(files, maxSimultaneousUploads, callback) {
var runningUploads = 0,
startedUploads = 0,
finishedUploads = 0;
function next() {
runningUploads--;
finishedUploads++;
if (finishedUploads == files.length) {
callback();
} else {
// Make sure that we are running at the maximum capacity.
queue();
}
}
function queue() {
// Run as many uploads as possible while not exceeding the given limit.
while (startedUploads < files.length && runningUploads < maxSimultaneousUploads) {
runningUploads++;
upload_file(files[startedUploads++], next);
}
}
// Start the upload!
queue();
}
Solution 4:
The others answers seem to be outdated. This can be solved easily using paralleLimit from async. Below is how to use it. I haven't tested it.
var tasks = files.map(function(f) {
return function(callback) {
upload_file(f, callback)
}
});
parallelLimit(tasks, 10, function(){
});