Chrome Extension: Get Page Variables in Content Script

Solution 1:

If you really need to, you can insert a <script> element into the page's DOM; the code inside your <script> element will be executed and that code will have access to JavaScript variables at the scope of the window. You can then communicate them back to the content script using data- attributes and firing custom events.

Sound awkward? Why yes, it is, and intentionally so for all the reasons in the documentation that serg has cited. But if you really, really need to do it, it can be done. See here and here for more info. And good luck!

Solution 2:

I created a little helper method, have fun :)

to retrieve the window's variables "lannister", "always", "pays", "his", "debts", you execute the following:

var windowVariables = retrieveWindowVariables(["lannister", "always", "pays", "his", "debts"]);
console.log(windowVariables.lannister);
console.log(windowVariables.always);

my code:

function retrieveWindowVariables(variables) {
    var ret = {};

    var scriptContent = "";
    for (var i = 0; i < variables.length; i++) {
        var currVariable = variables[i];
        scriptContent += "if (typeof " + currVariable + " !== 'undefined') $('body').attr('tmp_" + currVariable + "', " + currVariable + ");\n"
    }

    var script = document.createElement('script');
    script.id = 'tmpScript';
    script.appendChild(document.createTextNode(scriptContent));
    (document.body || document.head || document.documentElement).appendChild(script);

    for (var i = 0; i < variables.length; i++) {
        var currVariable = variables[i];
        ret[currVariable] = $("body").attr("tmp_" + currVariable);
        $("body").removeAttr("tmp_" + currVariable);
    }

    $("#tmpScript").remove();

    return ret;
}

please note that i used jQuery.. you can easily use the native js "removeAttribute" and "removeChild" instead.

Solution 3:

Using Liran's solution, I'm adding some fix for Objects, here's correct solution:

function retrieveWindowVariables(variables) {
    var ret = {};

    var scriptContent = "";
    for (var i = 0; i < variables.length; i++) {
        var currVariable = variables[i];
        scriptContent += "if (typeof " + currVariable + " !== 'undefined') $('body').attr('tmp_" + currVariable + "', JSON.stringify(" + currVariable + "));\n"
    }

    var script = document.createElement('script');
    script.id = 'tmpScript';
    script.appendChild(document.createTextNode(scriptContent));
    (document.body || document.head || document.documentElement).appendChild(script);

    for (var i = 0; i < variables.length; i++) {
        var currVariable = variables[i];
        ret[currVariable] = $.parseJSON($("body").attr("tmp_" + currVariable));
        $("body").removeAttr("tmp_" + currVariable);
    }

     $("#tmpScript").remove();

    return ret;
}

Solution 4:

Chrome's documentation gives you a good starting point: https://developer.chrome.com/extensions/content_scripts#host-page-communication

This method allows you to extract a global page variable to your content script. It also uses an idea to only accept incoming messages that you recognize given your handshake. You can also just use Math.random() for the handshake but I was having some fun.

Explanation

  1. This method creates a script tag
  2. It stringifies the function propagateVariable and passes the current handShake and targeted variable name into the string for preservation since the function will not have access to our content script scope.
  3. Then it injects that script tag to the page.
  4. We then create a listener in our content script waiting to hear back from the page to pass back the variable we're after.
  5. By now the injected script has hit the page.
  6. The injected code was wrapped in an IIFE so it runs itself pushing the data to the listener.
  7. Optional: The listener makes sure that it had the correct handshake and voila we can trust the source of the data (It's not actually secure, but it helps create an identifier in this case, that gives us some level of trust).

Round 1

v1.0

const globalToExtract = 'someVariableName';
const array = new Uint32Array(5);
const handShake = window.crypto.getRandomValues(array).toString();

function propagateVariable(handShake, variableName) {
  const message = { handShake };
  message[variableName] = window[variableName];
  window.postMessage(message, "*");
}

(function injectPropagator() {
  const script = `( ${propagateVariable.toString()} )('${handShake}', '${globalToExtract}');`
  const scriptTag = document.createElement('script');
  const scriptBody = document.createTextNode(script);
  
  scriptTag.id = 'chromeExtensionDataPropagator';
  scriptTag.appendChild(scriptBody);
  document.body.append(scriptTag);
})();

window.addEventListener("message", function({data}) {
  console.log("INCOMINGGGG!", data);
  // We only accept messages from ourselves
  if (data.handShake != handShake) return;

  console.log("Content script received: ", data);
}, false);

v1.1 With Promise!

function extractGlobal(variableName) {

  const array = new Uint32Array(5);
  const handShake = window.crypto.getRandomValues(array).toString();

  function propagateVariable(handShake, variableName) {
    const message = { handShake };
    message[variableName] = window[variableName];
    window.postMessage(message, "*");
  }

  (function injectPropagator() {
    const script = `( ${propagateVariable.toString()} )('${handShake}', '${variableName}');`
    const scriptTag = document.createElement('script');
    const scriptBody = document.createTextNode(script);

    scriptTag.id = 'chromeExtensionDataPropagator';
    scriptTag.appendChild(scriptBody);
    document.body.append(scriptTag);
  })();

  return new Promise(resolve => {
    window.addEventListener("message", function({data}) {
      // We only accept messages from ourselves
      if (data.handShake != handShake) return;
      resolve(data);
    }, false);
  });
}

extractGlobal('someVariableName').then(data => {
  // Do Work Here
});

Round 2 - Class & Promises

v2.0

I would recommend tossing the class into its own file and exporting it as a default if using es modules. Then it simply becomes:

ExtractPageVariable('someGlobalPageVariable').data.then(pageVar => {
  // Do work here 💪
});

class ExtractPageVariable {
  constructor(variableName) {
    this._variableName = variableName;
    this._handShake = this._generateHandshake();
    this._inject();
    this._data = this._listen();
  }

  get data() {
    return this._data;
  }

  // Private

  _generateHandshake() {
    const array = new Uint32Array(5);
    return window.crypto.getRandomValues(array).toString();
  }

  _inject() {
    function propagateVariable(handShake, variableName) {
      const message = { handShake };
      message[variableName] = window[variableName];
      window.postMessage(message, "*");
    }

    const script = `( ${propagateVariable.toString()} )('${this._handShake}', '${this._variableName}');`
    const scriptTag = document.createElement('script');
    const scriptBody = document.createTextNode(script);

    scriptTag.id = 'chromeExtensionDataPropagator';
    scriptTag.appendChild(scriptBody);
    document.body.append(scriptTag);
  }

  _listen() {
    return new Promise(resolve => {
      window.addEventListener("message", ({data}) => {
        // We only accept messages from ourselves
        if (data.handShake != this._handShake) return;
        resolve(data);
      }, false);
    })
  }
}

const windowData = new ExtractPageVariable('somePageVariable').data;
windowData.then(console.log);
windowData.then(data => {
   // Do work here
});

Solution 5:

As explained partially in other answers, the JS variables from the page are isolated from your Chrome extension content script. Normally, there's no way to access them.

But if you inject a JavaScript tag in the page, you will have access to whichever variables are defined there.

I use a utility function to inject my script in the page:

/**
 * inject - Inject some javascript in order to expose JS variables to our content JavaScript
 * @param {string} source - the JS source code to execute
 * Example: inject('(' + myFunction.toString() + ')()');
 */
function inject(source) {
  const j = document.createElement('script'),
    f = document.getElementsByTagName('script')[0];
  j.textContent = source;
  f.parentNode.insertBefore(j, f);
  f.parentNode.removeChild(j);
}

Then you can do:

function getJSvar(whichVar) {
   document.body.setAttribute('data-'+whichVar,whichVar);
}
inject('(' + getJSvar.toString() + ')("somePageVariable")');

var pageVar = document.body.getAttribute('data-somePageVariable');

Note that if the variable is a complex data type (object, array...), you will need to store the value as a JSON string in getJSvar(), and JSON.parse it back in your content script.