Skip to content

Instantly share code, notes, and snippets.

@zeraphie
Last active October 16, 2019 11:56
Show Gist options
  • Save zeraphie/2fda8a0cb1736df33602d7ddef0ae297 to your computer and use it in GitHub Desktop.
Save zeraphie/2fda8a0cb1736df33602d7ddef0ae297 to your computer and use it in GitHub Desktop.
Add a service worker that can read a sitemap

Service Workers with a Sitemap Parser

This is a collection of ES2017 classes (so a transpiler is needed, webpack is used here with babel and the env preset) that utilize Service Workers to add offline support for a site. This example should work for WordPress sites as well

Installation

Because DOMParser is not included in Service Workers, you will need to install the npm package xmldom

npm install xmldom --save-dev

With Service Workers

This resulting links are meant to be added to the service worker in order to be cached in a fashion that keeps the most up to date data cached, so:

If there is a connection

  1. Fetch the list on the network
  2. Override the cache with the new list
  3. Cache all the links in the list

If there is no connection

  1. Fetch the list in the cache
  2. Cache all the links in the list

Example of using this in a Service Worker

Please note that there must be an offline.js in the webroot of the project, but it is fine to import a script to it (so a compiled script) via importScripts inside a Service worker, here's an example with webpack and gulp build tools

/build/js/master.js

if('serviceWorker' in navigator && window.location.protocol === 'https:'){
    try {
        navigator.serviceWorker
            .register('/offline.js')
            .then(() => {
                console.log('Service Worker Registered');
            });
    } catch(e) {
        console.log(e);
    }
}

/offline.js

fetch('<url/to/assets>/webpack/manifest.json')
    .then(data => data.json())
    .then(data => {
        importScripts(data['offline.js']);
    });
import OfflineHelper from './OfflineHelper';
/*==============================================================================
Config variables
==============================================================================*/
const cacheName = '<cache name here>';
const dataCacheName = `data-${cacheName}`;
/*==============================================================================
On installing the service worker, open the cache and add all the files to the
cache
==============================================================================*/
self.addEventListener('install', e => {
console.log('[ServiceWorker] Install');
e.waitUntil(
caches.open(cacheName).then(cache => {
console.log('[ServiceWorker] Caching app shell');
return OfflineHelper.fetchAssetsToCache().then(assets => {
return cache.addAll(assets);
});
})
);
});
/*==============================================================================
On activating the service worker, remove old caches
==============================================================================*/
self.addEventListener('activate', e => {
console.log('[ServiceWorker] Activate');
e.waitUntil(
caches.keys().then(keyList => {
return Promise.all(keyList.map(key => {
if(key !== cacheName && key !== dataCacheName){
console.log('[ServiceWorker] Removing old cache', key);
return caches.delete(key);
}
}));
})
);
return self.clients.claim();
});
/*==============================================================================
Use the cache then network strategy to load the resource
https://jakearchibald.com/2014/offline-cookbook/#cache-then-network
==============================================================================*/
self.addEventListener('fetch', e => {
/*--------------------------------------
Guard against extensions
--------------------------------------*/
if(e.request.url.indexOf(self.location.origin) === -1){
return;
}
console.log('[ServiceWorker] Fetch', e.request.url);
e.respondWith(
caches.open(dataCacheName).then(cache => {
return fetch(e.request).then(response => {
cache.put(e.request.url, response.clone());
return response;
});
})
);
});
import SitemapParser from './SitemapParser';
export const buildPath = '</url/to/assets>';
export const assetManifestURL = `${buildPath}/manifest.json`;
export const webpackManifestURL = `${buildPath}/webpack/manifest.json`;
export const sitemapURL = '/sitemap.xml';
export default class OfflineHelper {
/**
* Fetch all the assets in a manifest.json file
*
* @param manifestURL
* @returns {Promise<void>}
*/
static async fetchManifest(manifestURL){
return fetch(manifestURL)
.then(response => {
if(response.ok){
return response;
}
return Promise.reject(
`Failed to load: ${manifestURL}\r\n`+
`Network error with response: ${response.status}`
);
})
.then(data => data.json())
.then(data => Object.values(data))
.then(data => data.map(value => buildPath + '/' + value))
.catch(error => {
// Return an empty array if it failed for some reason, and
// log the error
console.log(error);
return []
});
}
/**
* Fetch the assets, webpack assets, and the sitemap links in parallel
*
* @returns {Promise<*[]>}
*/
static async fetchAssetsToCache(){
return Promise.all([
this.fetchManifest(assetManifestURL),
this.fetchManifest(webpackManifestURL),
new SitemapParser(sitemapURL).getLinks()
]).then(links => {
// Flatten the array of arrays
return links.reduce((stack, current) => stack.concat(current), []);
}).catch(error => {
console.log(error);
});
}
}
import xmldom from 'xmldom';
export default class SitemapParser {
constructor(sitemap){
this.sitemap = sitemap;
this.links = [];
}
/**
* Get the JSON version of the sitemap
*
* @return {Promise}
*/
async jsonSitemap(){
let sitemap = await fetch(this.sitemap);
// Only continue if it's safe
if(sitemap.status !== 200){
return false;
}
let parsed = await new xmldom.DOMParser()
.parseFromString(await sitemap.text(), 'text/xml');
return this.constructor.xmlToJson(parsed);
}
/**
* Parse all the urls in a json urlset
*
* @param {object} jsonData
* @return {array}
*/
parseURLSet(jsonData){
if(
!(jsonData.urlset.url instanceof Array)
&&
!jsonData.urlset.url.length
){
return this.links;
}
jsonData.urlset.url.forEach(url => {
this.links.push(url.loc['#text'])
});
return this.links;
}
/**
* Parse all the urls for sitemaps in a json sitemap index
*
* @param {object} jsonData
* @return {array}
*/
parseSitemapIndex(jsonData){
if(
!(jsonData.sitemapindex.sitemap instanceof Array)
&&
!jsonData.sitemapindex.sitemap.length
){
return [];
}
let links = [];
jsonData.sitemapindex.sitemap.forEach(url => {
links.push(url.loc['#text'])
});
return links;
}
/**
* Get all the links in all the sitemaps
*
* @return {Promise}
*/
async getLinks(){
let xmlData = await this.jsonSitemap();
// If the jsonSitemap function has not successfully run, return an empty
// array
if(!xmlData){
return [];
}
// There can be a sitemap of sitemaps, wordpress's yoast seo does
// this to cope with custom post types, so recursively get links
if(
typeof xmlData.sitemapindex !== 'undefined'
&&
typeof xmlData.sitemapindex.sitemap !== 'undefined'
){
let sitemaps = this.parseSitemapIndex(xmlData);
for(let sitemap of sitemaps){
let parser = new SitemapParser(sitemap);
let links = await parser.getLinks();
this.links.push(...links);
}
}
// The urlset is what is wanted to get the links from, so if it exists
// get them!
if(
typeof xmlData.urlset !== 'undefined'
&&
typeof xmlData.urlset.url !== 'undefined'
){
this.parseURLSet(xmlData);
}
return this.links;
}
/**
* Convert an xml string to a json object
*
* @param {string} xml
* @return {object}
*/
static xmlToJson(xml){
let jsonData = {};
if(xml.nodeType === 1){
if(xml.attributes.length > 0){
jsonData["@attributes"] = {};
for(let j = 0; j < xml.attributes.length; j++){
let attribute = xml.attributes.item(j);
jsonData["@attributes"][attribute.nodeName] = attribute.nodeValue;
}
}
} else if(xml.nodeType === 3){
jsonData = xml.nodeValue;
}
if(xml.hasChildNodes()){
for(let i = 0; i < xml.childNodes.length; i++){
let item = xml.childNodes.item(i);
let nodeName = item.nodeName;
if(typeof(jsonData[nodeName]) === "undefined"){
jsonData[nodeName] = this.xmlToJson(item);
} else {
if(typeof(jsonData[nodeName].push) === "undefined"){
let old = jsonData[nodeName];
jsonData[nodeName] = [];
jsonData[nodeName].push(old);
}
jsonData[nodeName].push(this.xmlToJson(item));
}
}
}
return jsonData;
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment