Limiting asynchronous calls in Node.js

I've got a Node.js app that gets a list of file locally and uploads them to a server. This list could contain thousands of files.

for (var i = 0; i < files.length; i++) {
   upload_file(files[i]);
}

If I execute this with thousands of files, upload_file will get called thousands of times all at once, and most likely die (or at least struggle). In the synchronous world, we'd create a thread pool and limit it to a certain number of threads. Is there a simple way to limit how many asynchronous calls get executed at once?


Solution 1:

As usual, I recommend Caolan McMahon's async module.

Make your upload_file function take a callback as it's second parameter:

var async = require("async");

function upload_file(file, callback) {
    // Do funky stuff with file
    callback();
}

var queue = async.queue(upload_file, 10); // Run ten simultaneous uploads

queue.drain = function() {
    console.log("All files are uploaded");
};

// Queue your files for upload
queue.push(files);

queue.concurrency = 20; // Increase to twenty simultaneous uploads

Solution 2:

The answer above, re: async on NPM is the best answer, but if you'd like to learn more about control flow:


You should look into control flow patterns. There's a wonderful discussion on control flow patterns in Chapter 7 of Mixu's Node Book. Namely, I'd look at the example in 7.2.3: Limited parallel - an asynchronous, parallel, concurrency limited for loop.

I've adapted his example:

function doUpload() {
    // perform file read & upload here...
}

var files   = [...];
var limit   = 10;       // concurrent read / upload limit
var running = 0;        // number of running async file operations

function uploader() {
    while(running < limit && files.length > 0) {
        var file = files.shift();
        doUpload(file, function() {
            running--;
            if(files.length > 0)
                uploader();
        });
        running++;
    }
}

uploader();

Solution 3:

You should try queueing. I assume that a callback is fired when upload_file() finishes. Something like this should do the trick (untested):

function upload_files(files, maxSimultaneousUploads, callback) {
    var runningUploads = 0,
        startedUploads = 0,
        finishedUploads = 0;

    function next() {
        runningUploads--;
        finishedUploads++;

        if (finishedUploads == files.length) {
            callback();
        } else {
            // Make sure that we are running at the maximum capacity.
            queue();
        }
    }

    function queue() {
        // Run as many uploads as possible while not exceeding the given limit.
        while (startedUploads < files.length && runningUploads < maxSimultaneousUploads) {
            runningUploads++;
            upload_file(files[startedUploads++], next);
        }
    }

    // Start the upload!
    queue();
}

Solution 4:

The others answers seem to be outdated. This can be solved easily using paralleLimit from async. Below is how to use it. I haven't tested it.

var tasks = files.map(function(f) {
    return function(callback) {
        upload_file(f, callback)
    }
});

parallelLimit(tasks, 10, function(){
});