UPDATE 4 June 2020: Instead of copying the Custom HTML code from the article, please load it from the GitHub Gist instead.
Four years ago, I wrote an article on how to persist GTM’s dataLayer
from page to page. Unfortunately, the solution was a bit clumsy, requiring you to give specific commands for the interactions, which made it really unwieldy in the long run. Google Tag Manager still doesn’t offer us a native way to persist the dataLayer
array or its internal data model from one page to the other, so I thought it was about time I revisit this idea.
This time, there won’t be an API to interact with. Instead, the solution will simply store the contents of the dataLayer
array AND the internal data model from page to page, until the user hasn’t interacted with GTM’s dataLayer
for a given amount of time.
The Simmer Newsletter
Follow this link to subscribe to the Simmer Newsletter! Stay up-to-date with the latest content from Simo Ahava and the Simmer online course platform.
Setting it up
To set it up, you need to create a Custom HTML tag, in which you’ll copy the following code. If you like, you can also copy the code from this gist.
UPDATE 4 June 2020: Please copy the code from the gist link above rather than from the article below. The gist is kept up-to-date, and has stability fixes that help resolve some issues users have been having with the solution.
The Custom HTML tag code
<script>
(function() {
// Set the timeout for when the dataLayer history should be purged. The default is 30 minutes.
// The timeout needs to be in milliseconds.
var timeout = 30*60*1000;
// Change dataLayerName only if you've defined another named for the dataLayer array in your
// GTM container snippet.
var dataLayerName = 'dataLayer';
// Don't change anything below.
// Initial settings
var initialLoad = true,
oldPush = window[dataLayerName].push;
// Method to copy items from dataLayer from before the GTM container snippet was loaded.
var backfillHistory = function() {
var tempHistory = [],
i = 0,
len = window[dataLayerName].length - 1;
for (; i < len; i++) {
tempHistory.push(window[dataLayerName][i]);
}
return tempHistory;
};
// Method to check if object is a plain object.
// From https://bit.ly/2A3Fuqe
var isPlainObject = function(value) {
if (!value || typeof value !== 'object' || // Nulls, dates, etc.
value.nodeType || // DOM nodes.
value === value.window) { // Window objects.
return false;
}
try {
if (value.constructor && !value.hasOwnProperty('constructor') &&
!value.constructor.prototype.hasOwnProperty('isPrototypeOf')) {
return false;
}
} catch (e) {
return false;
}
var key;
for (key in value) {}
return key === undefined || value.hasOwnProperty(key);
};
// Method to merge the stored data model and the history model together.
// From https://bit.ly/2FrPQWL
var mergeStates = function(storedModel, historyModel) {
for (var property in storedModel) {
if (storedModel.hasOwnProperty(property)) {
var storedProperty = storedModel[property];
if (Array.isArray(storedProperty)) {
if (!Array.isArray(historyModel[property])) historyModel[property] = [];
mergeStates(storedProperty, historyModel[property]);
} else if (isPlainObject(storedProperty)) {
if (!isPlainObject(historyModel[property])) historyModel[property] = {};
mergeStates(storedProperty, historyModel[property]);
} else {
historyModel[property] = storedProperty;
}
}
}
};
window[dataLayerName].push = function() {
try {
// Build the history array from local storage
window._dataLayerHistory = JSON.parse(
window.localStorage.getItem('_dataLayerHistory') ||
'{"timeout": null, "history": [], "model": {}}'
);
// Initial settings
var timeNow = new Date().getTime(),
states = [].slice.call(arguments, 0),
results = oldPush.apply(window[dataLayerName], states),
oDataLayer = window[dataLayerName],
dHistory = window._dataLayerHistory,
oDataModel = window.google_tag_manager[{{Container ID}}].dataLayer.get({split: function() { return []; }});
// Method to reset the history array to the current page state only
dHistory.reset = function() {
dHistory.timeout = null;
dHistory.history = backfillHistory();
dHistory.model = {};
mergeStates(oDataModel, dHistory.model);
window.localStorage.setItem('_dataLayerHistory', JSON.stringify(dHistory));
};
// From https://bit.ly/2A2ZcCG
dHistory.model.get = function(key) {
var target = dHistory.model;
var split = key.split('.');
for (var i = 0; i < split.length; i++) {
if (target[split[i]] === undefined) return undefined;
target = target[split[i]];
}
return target;
};
// Add history if this is the initialization event itself
if (initialLoad) {
dHistory.history = dHistory.history.concat(backfillHistory());
initialLoad = false;
}
// If timeout is reached, reset the history array
if (dHistory.hasOwnProperty('timeout') && dHistory.timeout < timeNow) {
dHistory.reset();
}
// Push latest item from dataLayer into the history array
dHistory.history.push(oDataLayer[oDataLayer.length-1]);
// Merge GTM's data model with the history model
mergeStates(oDataModel, dHistory.model);
// Update the timeout
dHistory.timeout = timeNow + timeout;
// Write the new history into localStorage
window.localStorage.setItem('_dataLayerHistory', JSON.stringify(dHistory));
return results;
} catch(e) {
console.log('Problem interacting with dataLayer history: ' + e);
var states = [].slice.call(arguments, 0),
results = oldPush.apply(window[dataLayerName], states);
return results;
}
};
})();
</script>
In the very beginning of the snippet, there are two variables whose values you need to modify.
var timeout = 30*60*1000;
The line above establishes the timeout for the local storage. This means that once the user hasn’t interacted with dataLayer
for as long as you set in the timeout, the history will be reset to start from the current page. The default value is 30 minutes, and if you want to modify it make sure you set the timeout in milliseconds, as in the default value.
var dataLayerName = 'dataLayer';
The line above is the name of the dataLayer
array that Google Tag Manager uses, and it defaults to the unmodified container snippet. If you’ve changed the dataLayer
name in the container snippet, make sure it’s updated here, too.
Other tag settings
In addition to copy-pasting the code above, set the Tag Priority value to 9999
or any number that’s higher than any other Tag Priority for tags firing on the All Pages trigger.
Trigger
Set this Custom HTML tag to fire on the All Pages trigger. You want it to be the first tag that fires on the page. Naturally, if you have tags firing on an event that’s pushed into dataLayer
before the Google Tag Manager container snippet, you need to make sure this tag fires on that trigger instead.
How it works
Whenever something is pushed into dataLayer
, it is also pushed into a new array under window._dataLayerHistory
. This is a global object, and you can access it from anywhere on the page, including GTM’s Custom HTML tags and Custom JavaScript variables.
In addition to being added to this history array, this array is also consistently written into the window.localStorage
structure, which persists across pages until the user decides to clear their browser storage.
In short, there’s a new window._dataLayerHistory
object that contains information about all the items pushed into dataLayer
across pages, and you can access this object from any JavaScript context on the page.
The history array
The array itself, representing the history of the window.dataLayer
array, can be found at window._dataLayerHistory.history
.
When the Custom HTML tag is first loaded on any page, this history array is first back-filled with items from the current window.dataLayer
that were pushed before the Custom HTML tag was fired. This is necessary, because the Custom HTML tag creates its own .push()
listener only when it fires, at which point the window.dataLayer
array will already contain items.
One quirky thing you might notice is that if there’s a window.dataLayer.push()
call taking place in a tag sequence, the object pushed into the history array will not contain the gtm.uniqueEventId
key. There’s not much I can do about this, unfortunately, but it shouldn’t be a big deal.
The data model
If you’re not familiar with GTM’s data model, it’s essentially a lookup table to which GTM copies and merges the key-value pairs you push into the dataLayer
array.
It’s important to understand this distinction, because GTM uses the internal data model for Data Layer variables.
The data model is also persisted from page to page in window._dataLayerHistory.model
. This object has a get()
method you can use to fetch data model values, just like GTM’s own native interface does:
window._dataLayerHistory.model.get('someOldVariableFromAPreviousPage');
This might be useful. For example, if your site writes to dataLayer
something like {userLoggedIn: true}
when the user logs into the site, but it only does this when the user actual logs in, you can fetch this value on later pages by querying the history object:
window._dataLayerHistory.model.get('userLoggedIn');
The history model applies the same type of recursive merge that GTM does with its internal data model. This might lead to unexpected outcomes with objects and arrays, so be sure to read up on recursive merge before moving on.
Reset
You can also reset the history by executing this command:
window._dataLayerHistory.reset();
This nulls the timeout and resets the history array and history model to the states of the current page, thus removing any history from both. It also resets the object stored in browser storage to this, reset state.
Applications
There are many things you could do with a persistent dataLayer
and data model. Here are some examples.
Get number of pages loaded
To identify how many pages the user has visited, you could have a Custom JavaScript variable that does this:
function() {
return window._dataLayerHistory.history.filter(function(obj) { return obj.event === 'gtm.js'; }).length;
}
This returns the number of times that the gtm.js
event has been pushed into dataLayer
, and you can use this as a reasonably good proxy for determining how many pages the user has visited. Note that if you use either gtag.js or Google Optimize, this variable might not return an accurate result.
Check the entire dataLayer
history for some key or value
Here’s an extension of this solution I wrote for the window.dataLayer
array (i.e. the dataLayer
of the current page only). With this, you can search the entire history of the dataLayer
array for a given key-value pair. This is what the modified Custom JavaScript variable looks like:
function() {
// Modify the searchObject below.
//
// Add each key-value pair you want to look for directly into the searchObject object. Use
// strings for keys.
//
// The variable will look for any key-value pair you specify, and return true if any one of them
// is found. If you use dot notation, the variable will try to find a key with this name first,
// after which it will parse the nested structure looking for a match.
var searchObject = {
'user.consentGiven': 'false'
};
var dataLayerName = '_dataLayerHistory';
// Don't edit anything below this line.
var getValueForObjectString = function(obj, key) {
return key.split(".").reduce(function(o, x) {
return (typeof o == "undefined" || o === null) ? o : o[x];
}, obj);
};
return window[dataLayerName].history.filter(function(obj) {
var found = false;
var prop;
for (prop in searchObject) {
if (obj[prop] == searchObject[prop] || getValueForObjectString(obj, prop) == searchObject[prop]) {
found = true;
}
}
return found;
}).length > 0;
}
Summary
You can use this script to persist the dataLayer
array as well as Google Tag Manager’s internal data model from one page to the next.
It’s not foolproof. For example, it doesn’t understand command arrays or command functions, and it doesn’t understand any manual .set()
commands you run against GTM’s own, internal data model. (Note, if any of the terms in this paragraph were alien to you, please read my article on GTM’s internal data model).
As always, this was more a tech demo than a turnkey solution. Please let me know in the comments if you have uses for this kind of a solution. Also, if you have improvement suggestions, let me know of those, too!