Persist Google Tag Manager's DataLayer Across Pages

Use this Custom HTML tag script to persist GTM's dataLayer (and data model) across pages until a given timeout is reached.

UPDATE 4 June 2020: Instead of copying the Custom HTML code from the article, please load it from the GitHub Gist instead.

Four years ago, I wrote an article on how to persist GTM’s dataLayer from page to page. Unfortunately, the solution was a bit clumsy, requiring you to give specific commands for the interactions, which made it really unwieldy in the long run. Google Tag Manager still doesn’t offer us a native way to persist the dataLayer array or its internal data model from one page to the other, so I thought it was about time I revisit this idea.

This time, there won’t be an API to interact with. Instead, the solution will simply store the contents of the dataLayer array AND the internal data model from page to page, until the user hasn’t interacted with GTM’s dataLayer for a given amount of time.

Setting it up

To set it up, you need to create a Custom HTML tag, in which you’ll copy the following code. If you like, you can also copy the code from this gist.

UPDATE 4 June 2020: Please copy the code from the gist link above rather than from the article below. The gist is kept up-to-date, and has stability fixes that help resolve some issues users have been having with the solution.

The Custom HTML tag code

<script>
  (function() {
    // Set the timeout for when the dataLayer history should be purged. The default is 30 minutes.
    // The timeout needs to be in milliseconds.
    var timeout = 30*60*1000;
    
    // Change dataLayerName only if you've defined another named for the dataLayer array in your
    // GTM container snippet.
    var dataLayerName = 'dataLayer';
    
    // Don't change anything below.
    // Initial settings
    var initialLoad     = true,
        oldPush         = window[dataLayerName].push;
    
    // Method to copy items from dataLayer from before the GTM container snippet was loaded.
    var backfillHistory = function() {
      var tempHistory = [],
          i           = 0,
          len         = window[dataLayerName].length - 1;
      for (; i < len; i++) {
        tempHistory.push(window[dataLayerName][i]);
      }
      return tempHistory;
    };
    
    // Method to check if object is a plain object.
    // From https://bit.ly/2A3Fuqe
    var isPlainObject = function(value) {
      if (!value || typeof value !== 'object' ||    // Nulls, dates, etc.
          value.nodeType ||                             // DOM nodes.
          value === value.window) {                      // Window objects.
        return false;
      }
      try {
        if (value.constructor && !value.hasOwnProperty('constructor') &&
            !value.constructor.prototype.hasOwnProperty('isPrototypeOf')) {
          return false;
        }
      } catch (e) {
        return false;
      }
      var key;
      for (key in value) {}
      return key === undefined || value.hasOwnProperty(key);
    };
    
    // Method to merge the stored data model and the history model together.
    // From https://bit.ly/2FrPQWL
    var mergeStates = function(storedModel, historyModel) {
      for (var property in storedModel) {
        if (storedModel.hasOwnProperty(property)) {
          var storedProperty = storedModel[property];
          if (Array.isArray(storedProperty)) {
            if (!Array.isArray(historyModel[property])) historyModel[property] = [];
            mergeStates(storedProperty, historyModel[property]);
          } else if (isPlainObject(storedProperty)) {
            if (!isPlainObject(historyModel[property])) historyModel[property] = {};
            mergeStates(storedProperty, historyModel[property]);
          } else {
            historyModel[property] = storedProperty;
          }
        }
      }
    };
    
    window[dataLayerName].push = function() {
      try {
        
        // Build the history array from local storage
        window._dataLayerHistory = JSON.parse(
          window.localStorage.getItem('_dataLayerHistory') || 
          '{"timeout": null, "history": [], "model": {}}'
        );
        
        // Initial settings
        var timeNow     = new Date().getTime(),
            states      = [].slice.call(arguments, 0),
            results     = oldPush.apply(window[dataLayerName], states),
            oDataLayer  = window[dataLayerName],
            dHistory    = window._dataLayerHistory,
            oDataModel  = window.google_tag_manager[{{Container ID}}].dataLayer.get({split: function() { return []; }});
        
        // Method to reset the history array to the current page state only
        dHistory.reset = function() {
          dHistory.timeout = null;
          dHistory.history = backfillHistory();
          dHistory.model = {};
          mergeStates(oDataModel, dHistory.model);
          window.localStorage.setItem('_dataLayerHistory', JSON.stringify(dHistory));
        };
      
        // From https://bit.ly/2A2ZcCG
        dHistory.model.get = function(key) {
          var target = dHistory.model;
          var split = key.split('.');
          for (var i = 0; i < split.length; i++) {
            if (target[split[i]] === undefined) return undefined;
            target = target[split[i]];
          }
          return target;
        };

        // Add history if this is the initialization event itself
        if (initialLoad) {
          dHistory.history = dHistory.history.concat(backfillHistory());
          initialLoad = false;
        }
        
        // If timeout is reached, reset the history array
        if (dHistory.hasOwnProperty('timeout') && dHistory.timeout < timeNow) {
          dHistory.reset();
        }
        
        // Push latest item from dataLayer into the history array
        dHistory.history.push(oDataLayer[oDataLayer.length-1]);
        
        // Merge GTM's data model with the history model
        mergeStates(oDataModel, dHistory.model);
        
        // Update the timeout
        dHistory.timeout = timeNow + timeout;
        
        // Write the new history into localStorage
        window.localStorage.setItem('_dataLayerHistory', JSON.stringify(dHistory));
        return results;
      } catch(e) {
        console.log('Problem interacting with dataLayer history: ' + e);
        var states  = [].slice.call(arguments, 0),
            results = oldPush.apply(window[dataLayerName], states);
        return results;
      }
    };
  })();
</script>

In the very beginning of the snippet, there are two variables whose values you need to modify.

var timeout = 30*60*1000;

The line above establishes the timeout for the local storage. This means that once the user hasn’t interacted with dataLayer for as long as you set in the timeout, the history will be reset to start from the current page. The default value is 30 minutes, and if you want to modify it make sure you set the timeout in milliseconds, as in the default value.

var dataLayerName = 'dataLayer';

The line above is the name of the dataLayer array that Google Tag Manager uses, and it defaults to the unmodified container snippet. If you’ve changed the dataLayer name in the container snippet, make sure it’s updated here, too.

Other tag settings

In addition to copy-pasting the code above, set the Tag Priority value to 9999 or any number that’s higher than any other Tag Priority for tags firing on the All Pages trigger.

Trigger

Set this Custom HTML tag to fire on the All Pages trigger. You want it to be the first tag that fires on the page. Naturally, if you have tags firing on an event that’s pushed into dataLayer before the Google Tag Manager container snippet, you need to make sure this tag fires on that trigger instead.

How it works

Whenever something is pushed into dataLayer, it is also pushed into a new array under window._dataLayerHistory. This is a global object, and you can access it from anywhere on the page, including GTM’s Custom HTML tags and Custom JavaScript variables.

In addition to being added to this history array, this array is also consistently written into the window.localStorage structure, which persists across pages until the user decides to clear their browser storage.

In short, there’s a new window._dataLayerHistory object that contains information about all the items pushed into dataLayer across pages, and you can access this object from any JavaScript context on the page.

The history array

The array itself, representing the history of the window.dataLayer array, can be found at window._dataLayerHistory.history.

When the Custom HTML tag is first loaded on any page, this history array is first back-filled with items from the current window.dataLayer that were pushed before the Custom HTML tag was fired. This is necessary, because the Custom HTML tag creates its own .push() listener only when it fires, at which point the window.dataLayer array will already contain items.

One quirky thing you might notice is that if there’s a window.dataLayer.push() call taking place in a tag sequence, the object pushed into the history array will not contain the gtm.uniqueEventId key. There’s not much I can do about this, unfortunately, but it shouldn’t be a big deal.

The data model

If you’re not familiar with GTM’s data model, it’s essentially a lookup table to which GTM copies and merges the key-value pairs you push into the dataLayer array.

It’s important to understand this distinction, because GTM uses the internal data model for Data Layer variables.

The data model is also persisted from page to page in window._dataLayerHistory.model. This object has a get() method you can use to fetch data model values, just like GTM’s own native interface does:

window._dataLayerHistory.model.get('someOldVariableFromAPreviousPage');

This might be useful. For example, if your site writes to dataLayer something like {userLoggedIn: true} when the user logs into the site, but it only does this when the user actual logs in, you can fetch this value on later pages by querying the history object:

window._dataLayerHistory.model.get('userLoggedIn');

The history model applies the same type of recursive merge that GTM does with its internal data model. This might lead to unexpected outcomes with objects and arrays, so be sure to read up on recursive merge before moving on.

Reset

You can also reset the history by executing this command:

window._dataLayerHistory.reset();

This nulls the timeout and resets the history array and history model to the states of the current page, thus removing any history from both. It also resets the object stored in browser storage to this, reset state.

Applications

There are many things you could do with a persistent dataLayer and data model. Here are some examples.

Get number of pages loaded

To identify how many pages the user has visited, you could have a Custom JavaScript variable that does this:

function() {
  return window._dataLayerHistory.history.filter(function(obj) { return obj.event === 'gtm.js'; }).length;
}

This returns the number of times that the gtm.js event has been pushed into dataLayer, and you can use this as a reasonably good proxy for determining how many pages the user has visited. Note that if you use either gtag.js or Google Optimize, this variable might not return an accurate result.

Check the entire dataLayer history for some key or value

Here’s an extension of this solution I wrote for the window.dataLayer array (i.e. the dataLayer of the current page only). With this, you can search the entire history of the dataLayer array for a given key-value pair. This is what the modified Custom JavaScript variable looks like:

function() {
  // Modify the searchObject below.
  //
  // Add each key-value pair you want to look for directly into the searchObject object. Use
  // strings for keys. 
  //
  // The variable will look for any key-value pair you specify, and return true if any one of them
  // is found. If you use dot notation, the variable will try to find a key with this name first,
  // after which it will parse the nested structure looking for a match.
  var searchObject = {
    'user.consentGiven': 'false'
  };
  
  var dataLayerName = '_dataLayerHistory';
  
  // Don't edit anything below this line.
  var getValueForObjectString = function(obj, key) {
    return key.split(".").reduce(function(o, x) {
        return (typeof o == "undefined" || o === null) ? o : o[x];
    }, obj);
  };
  
  return window[dataLayerName].history.filter(function(obj) {
    var found = false;
    var prop;
    
    for (prop in searchObject) {
      if (obj[prop] == searchObject[prop] || getValueForObjectString(obj, prop) == searchObject[prop]) {
        found = true;
      }
    }
    return found;
  }).length > 0;
}

Summary

You can use this script to persist the dataLayer array as well as Google Tag Manager’s internal data model from one page to the next.

It’s not foolproof. For example, it doesn’t understand command arrays or command functions, and it doesn’t understand any manual .set() commands you run against GTM’s own, internal data model. (Note, if any of the terms in this paragraph were alien to you, please read my article on GTM’s internal data model).

As always, this was more a tech demo than a turnkey solution. Please let me know in the comments if you have uses for this kind of a solution. Also, if you have improvement suggestions, let me know of those, too!