How do you get a list of the names of all files present in a directory in Node.js?

I'm trying to get a list of the names of all the files present in a directory using Node.js. I want output that is an array of filenames. How can I do this?

Solution 1:

You can use the fs.readdir or fs.readdirSync methods. fs is included in Node.js core, so there's no need to install anything.

fs.readdir

const testFolder = './tests/';
const fs = require('fs');

fs.readdir(testFolder, (err, files) => {
  files.forEach(file => {
    console.log(file);
  });
});

fs.readdirSync

const testFolder = './tests/';
const fs = require('fs');

fs.readdirSync(testFolder).forEach(file => {
  console.log(file);
});

The difference between the two methods, is that the first one is asynchronous, so you have to provide a callback function that will be executed when the read process ends.

The second is synchronous, it will return the file name array, but it will stop any further execution of your code until the read process ends.

Solution 2:

IMO the most convenient way to do such tasks is to use a glob tool. Here's a glob package for node.js. Install with

npm install glob

Then use wild card to match filenames (example taken from package's website)

var glob = require("glob")

// options is optional
glob("**/*.js", options, function (er, files) {
  // files is an array of filenames.
  // If the `nonull` option is set, and nothing
  // was found, then files is ["**/*.js"]
  // er is an error object or null.
})

If you are planning on using globby here is an example to look for any xml files that are under current folder

var globby = require('globby');

const paths = await globby("**/*.xml");

Solution 3:

The answer above does not perform a recursive search into the directory though. Here's what I did for a recursive search (using node-walk: npm install walk)

var walk    = require('walk');
var files   = [];

// Walker options
var walker  = walk.walk('./test', { followLinks: false });

walker.on('file', function(root, stat, next) {
    // Add this file to the list of files
    files.push(root + '/' + stat.name);
    next();
});

walker.on('end', function() {
    console.log(files);
});

Solution 4:

Get files in all subdirs

const fs=require('fs');

function getFiles (dir, files_){
    files_ = files_ || [];
    var files = fs.readdirSync(dir);
    for (var i in files){
        var name = dir + '/' + files[i];
        if (fs.statSync(name).isDirectory()){
            getFiles(name, files_);
        } else {
            files_.push(name);
        }
    }
    return files_;
}

console.log(getFiles('path/to/dir'))

Solution 5:

Here's a simple solution using only the native fs and path modules:

// sync version
function walkSync(currentDirPath, callback) {
    var fs = require('fs'),
        path = require('path');
    fs.readdirSync(currentDirPath).forEach(function (name) {
        var filePath = path.join(currentDirPath, name);
        var stat = fs.statSync(filePath);
        if (stat.isFile()) {
            callback(filePath, stat);
        } else if (stat.isDirectory()) {
            walkSync(filePath, callback);
        }
    });
}

or async version (uses fs.readdir instead):

// async version with basic error handling
function walk(currentDirPath, callback) {
    var fs = require('fs'),
        path = require('path');
    fs.readdir(currentDirPath, function (err, files) {
        if (err) {
            throw new Error(err);
        }
        files.forEach(function (name) {
            var filePath = path.join(currentDirPath, name);
            var stat = fs.statSync(filePath);
            if (stat.isFile()) {
                callback(filePath, stat);
            } else if (stat.isDirectory()) {
                walk(filePath, callback);
            }
        });
    });
}

Then you just call (for sync version):

walkSync('path/to/root/dir', function(filePath, stat) {
    // do something with "filePath"...
});

or async version:

walk('path/to/root/dir', function(filePath, stat) {
    // do something with "filePath"...
});

The difference is in how node blocks while performing the IO. Given that the API above is the same, you could just use the async version to ensure maximum performance.

However there is one advantage to using the synchronous version. It is easier to execute some code as soon as the walk is done, as in the next statement after the walk. With the async version, you would need some extra way of knowing when you are done. Perhaps creating a map of all paths first, then enumerating them. For simple build/util scripts (vs high performance web servers) you could use the sync version without causing any damage.