Node.js: Count the number of lines in a file
solution without using wc:
var i;
var count = 0;
require('fs').createReadStream(process.argv[2])
.on('data', function(chunk) {
for (i=0; i < chunk.length; ++i)
if (chunk[i] == 10) count++;
})
.on('end', function() {
console.log(count);
});
it's slower, but not that much you might expect - 0.6s for 140M+ file including node.js loading & startup time
>time node countlines.js video.mp4
619643
real 0m0.614s
user 0m0.489s
sys 0m0.132s
>time wc -l video.mp4
619643 video.mp4
real 0m0.133s
user 0m0.108s
sys 0m0.024s
>wc -c video.mp4
144681406 video.mp4
We can use indexOf to let the VM find the newlines:
function countFileLines(filePath){
return new Promise((resolve, reject) => {
let lineCount = 0;
fs.createReadStream(filePath)
.on("data", (buffer) => {
let idx = -1;
lineCount--; // Because the loop will run once for idx=-1
do {
idx = buffer.indexOf(10, idx+1);
lineCount++;
} while (idx !== -1);
}).on("end", () => {
resolve(lineCount);
}).on("error", reject);
});
};
What this solution does is that it finds the position of the first newline using .indexOf
. It increments lineCount
, then it finds the next position. The second parameter to .indexOf
tells where to start looking for newlines. This way we are jumping over large chunks of the buffer. The while loop will run once for every newline, plus one.
We are letting the Node runtime do the searching for us which is implemented on a lower level and should be faster.
On my system this is about twice as fast as running a for
loop over the buffer length on a large file (111 MB).
You could do this as the comments suggest using wc
var exec = require('child_process').exec;
exec('wc /path/to/file', function (error, results) {
console.log(results);
});
since iojs 1.5.0 there is Buffer#indexOf()
method, using it to compare to Andrey Sidorov' answer:
ubuntu@server:~$ wc logs
7342500 27548750 427155000 logs
ubuntu@server:~$ time wc -l logs
7342500 logs
real 0m0.180s
user 0m0.088s
sys 0m0.084s
ubuntu@server:~$ nvm use node
Now using node v0.12.1
ubuntu@server:~$ time node countlines.js logs
7342500
real 0m2.559s
user 0m2.200s
sys 0m0.340s
ubuntu@server:~$ nvm use iojs
Now using node iojs-v1.6.2
ubuntu@server:~$ time iojs countlines2.js logs
7342500
real 0m1.363s
user 0m0.920s
sys 0m0.424s
ubuntu@server:~$ cat countlines.js
var i;
var count = 0;
require('fs').createReadStream(process.argv[2])
.on('data', function(chunk) {
for (i=0; i < chunk.length; ++i)
if (chunk[i] == 10) count++;
})
.on('end', function() {
console.log(count);
});
ubuntu@server:~$ cat countlines2.js
var i;
var count = 0;
require('fs').createReadStream(process.argv[2])
.on('data', function(chunk) {
var index = -1;
while((index = chunk.indexOf(10, index + 1)) > -1) count++
})
.on('end', function() {
console.log(count);
});
ubuntu@server:~$
If you use Node 8 and above, you can use this async/await pattern
const util = require('util');
const exec = util.promisify(require('child_process').exec);
async function fileLineCount({ fileLocation }) {
const { stdout } = await exec(`cat ${fileLocation} | wc -l`);
return parseInt(stdout);
};
// Usage
async someFunction() {
const lineCount = await fileLineCount({ fileLocation: 'some/file.json' });
}