filereader api on big files
My file reader api code has been working good so far until one day I got a 280MB txt file from one of my client. Page just crashes straight up in Chrome and in Firefox nothing happens.
// create new reader object
var fileReader = new FileReader();
// read the file as text
fileReader.readAsText( $files[i] );
fileReader.onload = function(e)
{ // read all the information about the file
// do sanity checks here etc...
$timeout( function()
{
// var fileContent = e.target.result;
// get the first line
var firstLine = e.target.result.slice(0, e.target.result.indexOf("\n") ); }}
What I am trying to do above is that get the first line break so that I can get the column length of the file. Should I not read it as text ? How can I get the column length of the file without breaking the page on big files?
Your application is failing for big files because you're reading the full file into memory before processing it. This inefficiency can be solved by streaming the file (reading chunks of a small size), so you only need to hold a part of the file in memory.
A File
objects is also an instance of a Blob
, which offers the .slice
method to create a smaller view of the file.
Here is an example that assumes that the input is ASCII (demo: http://jsfiddle.net/mw99v8d4/).
function findColumnLength(file, callback) {
// 1 KB at a time, because we expect that the column will probably small.
var CHUNK_SIZE = 1024;
var offset = 0;
var fr = new FileReader();
fr.onload = function() {
var view = new Uint8Array(fr.result);
for (var i = 0; i < view.length; ++i) {
if (view[i] === 10 || view[i] === 13) {
// \n = 10 and \r = 13
// column length = offset + position of \r or \n
callback(offset + i);
return;
}
}
// \r or \n not found, continue seeking.
offset += CHUNK_SIZE;
seek();
};
fr.onerror = function() {
// Cannot read file... Do something, e.g. assume column size = 0.
callback(0);
};
seek();
function seek() {
if (offset >= file.size) {
// No \r or \n found. The column size is equal to the full
// file size
callback(file.size);
return;
}
var slice = file.slice(offset, offset + CHUNK_SIZE);
fr.readAsArrayBuffer(slice);
}
}
The previous snippet counts the number of bytes before a line break. Counting the number of characters in a text consisting of multibyte characters is slightly more difficult, because you have to account for the possibility that the last byte in the chunk could be a part of a multibyte character.
There is a awesome library called Papa Parse that do that in a graceful way! It can really handle big files and also you can use web worker.
Just try out the demos that they provide: https://www.papaparse.com/demo