Skip to content

Instantly share code, notes, and snippets.

@coffee-mug
Created November 2, 2021 14:26
Show Gist options
  • Save coffee-mug/142d2dd29c7222de88ffd15cb5086846 to your computer and use it in GitHub Desktop.
Save coffee-mug/142d2dd29c7222de88ffd15cb5086846 to your computer and use it in GitHub Desktop.
const fs = require('fs');
const path = require('path');
// Usage: node main.js [optional har file path] [optional JSON dataLayer file path]
// Iterate over all files in $folderName, grouping them by name and then performing
// audit: hostname extaction, flagging dataLayer values observed in requests and exporting
// results as a csv file in the curent folder.
// REQUIREMENTS:
// * files should have the same name: fnac_home.json and fnac_home.har
// Change folderName with the name of the folders hosting your .har and .json files
main("infos");
function main(folderName) {
console.log("Debug: ", folderName);
// result array empty
const filesTuples = [];
// if files length is odd => error
const files = fs.readdirSync(folderName);
if (files.length == 0) {
console.log("No files to parse, exit.");
return
}
if (files.length % 2 != 0) {
console.log(`Wrong number of files (${files.length}), count should be even`);
return
}
console.log(`${files.length} files to group`);
// Sort all files names lexicographically
files.sort();
files.forEach((value, index) => {
// we operate on the current file and the next one, hence index must be even
if (index % 2 == 0) {
// next file is a json file and has the same name
if (files[index + 1].endsWith(".json") &&
files[index + 1].replace('.json', '') == files[index].replace('.har', '')) {
// store tuple in results
filesTuples.push([value, files[index + 1]]);
}
}
})
let results = [];
filesTuples.forEach(tuple => {
// debug
console.log(`Comparing ${tuple[0]} with ${tuple[1]}`)
const output = compareRequestsAndDatalayer(path.join(folderName, tuple[0]), path.join(folderName, tuple[1]));
results = results.concat(output)
})
// Add headers for readibility. Unshift mutates the array in place.
results.unshift(['Parent page', 'Hit', 'Hit Hostname', "DataLayer values present in the hit"])
// debug
console.log(results.slice(0, 4));
// - export a csv list of origin url, hit, hit domain, dataLayer values present in hit
fs.writeFileSync("audit.csv", results.join('\n'));
// consolidate JSONs files in a single flattened JSON object.
mergeDatalayerFiles();
}
// flattenObject recursively takes an object with an arbitrary depth
// and return an object with all properties grouped at the root level.
// Useful to have reduce digitalData to the same level
// Exemple:
// const nestedObject = { a: { b: 1}, c: { d: { e: 3 }}}, f: [1,2,{ g: "lol"}]}
// flattenObject(nestedObject, null, {})
// { b: 1, e: 3, f: [1,2], g: "lol"}
function flattenObject(object, key, output) {
// It's an object ? Recrusive call
if (typeof object == "object" && !(object instanceof Array)) {
Object.keys(object).forEach(property => flattenObject(object[property], property, output));
// An array ? Recursive call !
} else if (object instanceof Array) {
object.forEach(value => flattenObject(value, key, output));
// a primitive - (we don't handle Set, Map, ...) for now
} else {
if (output[key] != undefined) {
if (output[key] instanceof Array) {
output[key].push(object);
} else {
var temp = [];
temp.push(output[key]);
temp.push(object);
output[key] = temp;
}
} else {
output[key] = object;
}
return output;
}
return output;
}
/* Merge JSON files */
function mergeDatalayerFiles() {
const folder = "infos";
let merged = {}
fs.readdirSync("infos")
.filter(file => file.endsWith(".json"))
.forEach(file => {
let content = JSON.parse(fs.readFileSync(path.join(folder, file)));
content["AUDITED_PAGE"] = file.replace('.json', '').replace('_', ' ');
merged = Object.assign(merged, content);
});
fs.writeFileSync("merged.json", JSON.stringify(merged, (key, value) => {
if (value instanceof Array) {
return JSON.stringify(value);
}
return value
}, 2));
}
/*
Export requests into a file
Export dataLayer into a file
*/
function compareRequestsAndDatalayer(reqs, dL) {
const requestsPath = reqs || process.argv[2];
const dataLayerPath = dL || process.argv[3];
const allRequests = requestsPath ? fs.readFileSync(requestsPath) : null;
const dataLayer = dataLayerPath ? JSON.parse(fs.readFileSync(dataLayerPath)) : null;
// We're interested in PII or other juicy dataLayer values so filter out dumb data like single digits
// or "true" "false" that may be visible in hits while not being taken from the dataLayer.
const dataLayerEntries = dataLayer ? Object.entries(dataLayer).filter(tuple => tuple[1].length > 1 && !/true|false/.test(tuple[1])) : [];
const parsedRequests = JSON.parse(allRequests);
// For each request in the request file
// - Filter requests out
console.log("Requests length before filtering out extensions\n", JSON.parse(allRequests).log.entries.length);
const filtered = parsedRequests
.log
.entries
.filter(entry => !/\.css|\.html|\.js|\.jpe?g|\.png|\.woff2|\.svg|\.pdf|\.docx?|\.php|\.json/.test(decodeURIComponent(entry.request.url)))
console.log("Requests length after filtering out extensions\n", filtered.length);
// - Group them by domain
const output = filtered
.map(entry => [
// Parent page
parsedRequests.log.pages[0].title,
// Hit URL
decodeURIComponent(entry.request.url).replace(/,/g, '-'),
// Hit hostname
new URL(entry.request.url).hostname.replace('www.', ''),
// Datalayer values present in the hit
dataLayerEntries.length > 0 ? dataLayerEntries.filter(tuple => decodeURIComponent(entry.request.url).includes(tuple[1])).map(tuple => tuple.join('=')).join(';') : "",
])
return output;
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment