How to allow Web Workers to receive new data while it still performing computation?

I want to sort an array, using Web Workers. But this array might receive new values over time, while the worker is still performing the sort function.

So my question is, how can I "stop" the sorting computation on the worker after receiving the new item, so it can perform the sort on the array with that item, while still keeping the sorting that was already made?

Example:

let worker = new Worker('worker.js');
let list = [10,1,5,2,14,3];
worker.postMessage({ list });
setInterval(() => worker.postMessage({ num: SOME_RANDOM_NUM, list }), 100);

worker.onmessage = event => {
  list = event.data.list;
}

So lets say that, I've passed 50, the worker made some progress in the sorting before that and now I have something like this: [1, 2, 3, 10, 5, 14, 50]. Which means the sorting stopped at index 3. So I pass this new array back to the worker, so it can continue the sorting from position 3.

How can I accomplish that, since there is no way to pause/resume a web worker?


Solution 1:

Even though the Worker works on an other thread than the one of your main page, and can thus run continuously without blocking the UI, it still runs on a single thread.

This means that until your sort algorithm has finished, the Worker will delay the execution of the message event handler; it is as blocked as would be the main thread.

Even if you made use of an other Worker from inside this worker, the problem would be the same.

The only solution would be to use a kind of generator function as the sorter, and to yield it every now and then so that the events can get executed.

But doing this will drastically slow down your sorting algorithm.

To make it better, you could try to hook to each Event Loop, thanks to a MessageChannel object: you talk in one port and receive the message in the next Event loop. If you talk again to the other port, then you have your own hook to each Event loop.

Now, the best would be to run a good batch in every of these Event loop, but for demo, I'll call only one instance of our generator function (that I borrowed from this Q/A)

const worker = new Worker(getWorkerURL());
worker.onmessage = draw;

onclick = e =>     worker.postMessage(0x0000FF/0xFFFFFF); // add a red pixel

// every frame we request the current state from Worker
function requestFrame() {
  worker.postMessage('gimme a frame');
  requestAnimationFrame(requestFrame);
}
requestFrame();

// drawing part
const ctx = canvas.getContext('2d');
const img = ctx.createImageData(50, 50);
const data = new Uint32Array(img.data.buffer);
ctx.imageSmoothingEnabled = false;

function draw(evt) {
  // converts 0&1 to black and white pixels
  const list = evt.data;
  list.forEach((bool, i) =>
    data[i] = (bool * 0xFFFFFF) + 0xFF000000
  );
  ctx.setTransform(1,0,0,1,0,0);
  ctx.clearRect(0,0,canvas.width,canvas.height);
  ctx.putImageData(img,0,0);
  // draw bigger
  ctx.scale(5,5);
  ctx.drawImage(canvas, 0,0);
}

function getWorkerURL() {
  const script = document.querySelector('[type="worker-script"]');
  const blob = new Blob([script.textContent]);
  return URL.createObjectURL(blob);
}
body{
  background: ivory;
}
<script type="worker-script">
// our list
const list = Array.from({length: 2500}).map(_=>+(Math.random()>.5));
// our sorter generator
let sorter = bubbleSort(list);
let done = false;
/* inner messaging channel */
const msg_channel = new MessageChannel();
// Hook to every Event loop
msg_channel.port2.onmessage = e => {
  // procede next step in sorting algo
  // could be a few thousands in a loop
  const state = sorter.next();
  // while running
  if(!state.done) {
    msg_channel.port1.postMessage('');
    done = false;
  }
  else {
    done = true;
  }
}
msg_channel.port1.postMessage("");

/* outer messaging channel (from main) */
self.onmessage = e => {
  if(e.data === "gimme a frame") {
    self.postMessage(list);
  }
  else {
    list.push(e.data);
    if(done) { // restart the sorter
      sorter = bubbleSort(list);
      msg_channel.port1.postMessage('');
    }
  }
};

function* bubbleSort(a) { // * is magic
  var swapped;
  do {
    swapped = false;
    for (var i = 0; i < a.length - 1; i++) {
      if (a[i] > a[i + 1]) {
        var temp = a[i];
        a[i] = a[i + 1];
        a[i + 1] = temp;
        swapped = true;
        yield swapped; // pause here
      }
    }
  } while (swapped);
}
</script>
<pre> click to add red pixels</pre>
<canvas id="canvas" width="250" height="250"></canvas>

Note that the same can be achieved with an async function, which may be more practical in some cases:

const worker = new Worker(getWorkerURL());
worker.onmessage = draw;

onclick = e =>     worker.postMessage(0x0000FF/0xFFFFFF); // add a red pixel

// every frame we request the current state from Worker
function requestFrame() {
  worker.postMessage('gimme a frame');
  requestAnimationFrame(requestFrame);
}
requestFrame();

// drawing part
const ctx = canvas.getContext('2d');
const img = ctx.createImageData(50, 50);
const data = new Uint32Array(img.data.buffer);
ctx.imageSmoothingEnabled = false;

function draw(evt) {
  // converts 0&1 to black and white pixels
  const list = evt.data;
  list.forEach((bool, i) =>
    data[i] = (bool * 0xFFFFFF) + 0xFF000000
  );
  ctx.setTransform(1,0,0,1,0,0);
  ctx.clearRect(0,0,canvas.width,canvas.height);
  ctx.putImageData(img,0,0);
  // draw bigger
  ctx.scale(5,5);
  ctx.drawImage(canvas, 0,0);
}

function getWorkerURL() {
  const script = document.querySelector('[type="worker-script"]');
  const blob = new Blob([script.textContent]);
  return URL.createObjectURL(blob);
}
body{
  background: ivory;
}
<script type="worker-script">
// our list
const list = Array.from({length: 2500}).map(_=>+(Math.random()>.5));
// our sorter generator
let done = false;


/* outer messaging channel (from main) */
self.onmessage = e => {
  if(e.data === "gimme a frame") {
    self.postMessage(list);
  }
  else {
    list.push(e.data);
    if(done) { // restart the sorter
      bubbleSort(list);
    }
  }
};

async function bubbleSort(a) { // async is magic
  var swapped;
  do {
    swapped = false;
    for (var i = 0; i < a.length - 1; i++) {
      if (a[i] > a[i + 1]) {
        const temp = a[i];
        a[i] = a[i + 1];
        a[i + 1] = temp;
        swapped = true;
      }
      if( i % 50 === 0 ) { // by batches of 50?
        await waitNextTask(); // pause here
      }
    }
  } while (swapped);
  done = true;
}

function waitNextTask() {
  return new Promise( (resolve) => {
    const channel = waitNextTask.channel ||= new MessageChannel();
    channel.port1.addEventListener("message", (evt) => resolve(), { once: true });
    channel.port2.postMessage("");
    channel.port1.start();
  });
}

bubbleSort(list);
</script>
<pre> click to add red pixels</pre>
<canvas id="canvas" width="250" height="250"></canvas>

Solution 2:

There are two decent options.

Option 1: Worker.terminate()

The first is just to kill your existing web worker and start a new one. For that you can use Worker.terminate().

The terminate() method of the Worker interface immediately terminates the Worker. This does not offer the worker an opportunity to finish its operations; it is simply stopped at once.

The only downsides of this approach are:

  • You lose all worker state. If you had to copy a load of data into it for the request you have to do it all again.
  • It involves thread creation and destruction, which isn't as slow as most people think but if you terminate web workers a lot it might cause issues.

If neither of those are an issue it is probably the easiest option.

In my case I have lots of state. My worker is rendering part of an image, and when the user pans to a different area I want it to stop what it is doing and start rendering the new area. But the data needed to render the image is pretty huge.

In your case you have the state of your (presumably huge) list that you don't want to use.

Option 2: Yielding

The second option is basically to do cooperative multitasking. You run your computation as normal, but every now and then you pause (yield) and say "should I stop?", like this (this is for some nonsense calculation, not sorting).

let requestId = 0;

onmessage = event => {
  ++requestId;
  sortAndSendData(requestId, event.data);
}

function sortAndSendData(thisRequestId, data) {
  let isSorted = false;
  let total = 0;

  while (data !== 0) {
    // Do a little bit of computation.
    total += data;
    --data;

    // Check if we are still the current request ID.
    if (thisRequestId !== requestId) {
      // Data was changed. Cancel this sort.
      return;
    }
  }

  postMessage(total);
}

This won't work though because sortAndSendData() runs to completion and blocks the web worker's event loop. We need some way to yield just before thisRequestId !== requestId. Unfortunately Javascript doesn't quite have a yield method. It does have async/await so we might try this:

let requestId = 0;

onmessage = event => {
  console.log("Got event", event);
  ++requestId;
  sortAndSendData(requestId, event.data);
}

async function sortAndSendData(thisRequestId, data) {
  let isSorted = false;
  let total = 0;

  while (data !== 0) {
    // Do a little bit of computation.
    total += data;
    --data;

    await Promise.resolve();

    // Check if we are still the current request ID.
    if (thisRequestId !== requestId) {
      console.log("Cancelled!");
      // Data was changed. Cancel this sort.
      return;
    }
  }

  postMessage(total);
}

Unfortunately it doesn't work. I think it's because async/await executes things eagerly using "microtasks", which get executed before pending "macrotasks" (our web worker message) if possible.

We need to force our await to become a macrotask, which you can do using setTimeout(0):

let requestId = 0;

onmessage = event => {
  console.log("Got event", event);
  ++requestId;
  sortAndSendData(requestId, event.data);
}

function yieldToMacrotasks() {
  return new Promise((resolve) => setTimeout(resolve));
}

async function sortAndSendData(thisRequestId, data) {
  let isSorted = false;
  let total = 0;

  while (data !== 0) {
    // Do a little bit of computation.
    total += data;
    --data;

    await yieldToMacrotasks();

    // Check if we are still the current request ID.
    if (thisRequestId !== requestId) {
      console.log("Cancelled!");
      // Data was changed. Cancel this sort.
      return;
    }
  }

  postMessage(total);
}

This works! However it is extremely slow. await yieldToMacrotasks() takes approximately 4 ms on my machine with Chrome! This is because browsers set a minimum timeout on setTimeout(0) of something like 1 or 4 ms (the actual minimum seems to be complicated).

Fortunately another user pointed me to a quicker way. Basically sending a message on another MessageChannel also yields to the event loop, but isn't subject to the minimum delay like setTimeout(0) is. This code works and each loop only takes ~0.04 ms which should be fine.

let currentTask = {
  cancelled: false,
}

onmessage = event => {
  currentTask.cancelled = true;
  currentTask = {
    cancelled: false,
  };
  performComputation(currentTask, event.data);
}

async function performComputation(task, data) {
  let total = 0;

  let promiseResolver;

  const channel = new MessageChannel();
  channel.port2.onmessage = event => {
    promiseResolver();
  };

  while (data !== 0) {
    // Do a little bit of computation.
    total += data;
    --data;

    // Yield to the event loop.
    const promise = new Promise(resolve => {
      promiseResolver = resolve;
    });
    channel.port1.postMessage(null);
    await promise;

    // Check if this task has been superceded by another one.
    if (task.cancelled) {
      return;
    }
  }

  // Return the result.
  postMessage(total);
}

I'm not totally happy about it - it relies on postMessage() events being processed in FIFO order, which I doubt is guaranteed. I suspect you could rewrite the code to make it work even if that isn't true.