Skip to content

Instantly share code, notes, and snippets.

@hoehrmann
Created June 8, 2013 10:08
Show Gist options
  • Save hoehrmann/5734742 to your computer and use it in GitHub Desktop.
Save hoehrmann/5734742 to your computer and use it in GitHub Desktop.
User script ("greasemonkey script") web browser extension that shows on pages like http://lists.w3.org/Archives/Public/www-talk/ checkboxes to select periods and a [mbox] button that when clicked retrieves mails from the archive according to the selection and then turns them into plain text mbox files that can be saved to disk for import into ma…
// ==UserScript==
// @name Generate mbox files for W3C list archives
// @description Generate mbox files for W3C list archives
// @version 0.9
// @author Bjoern Hoehrmann
// @license GPLv3+
// @include http://lists.w3.org/Archives/*
// @include https://lists.w3.org/Archives/*
// ==/UserScript==
(function(document) {
function single_mail_to_mbox_string(d) {
var get = function(selector) {
try {
return d.querySelector(selector)
.textContent.replace(/\s+/g, ' ');
} catch(e) {
return "";
}
};
var list_name = d.baseURI.match(/Archives\/.*?\/([^/]+)/)[1];
var headers = {
'Subject': get('h1'),
'Date': get('#date'),
'To': get('#to'),
'Message-Id': get('#message-id'),
'From': get('#from'),
'Archived-At': d.baseURI,
'Content-Type': 'text/plain; charset=utf-8',
'Content-Transfer-Encoding': '8bit',
'Mime-Version': '1.0',
'Resent-From': list_name + '@w3.org',
'Resent-Sender': list_name + '[email protected]',
'Precedence': 'list',
'List-Id': '<' + list_name + '.w3.org>',
'List-Help': '<http://www.w3.org/Mail/>',
'List-Post': '<mailto:' + list_name + '@w3.org>',
'List-Unsubscribe': '<mailto:' + list_name +
'[email protected]?subject=unsubscribe>',
};
var reply = d.body.innerHTML.match(/<!-- inreplyto="(.*?)"/);
if (reply && reply[1].indexOf('<') < 0 && "browsers suck") {
var elem = d.createElement('p');
elem.innerHTML = reply[1];
var entities_decoded = elem.textContent;
headers['In-Reply-To'] = '<' + entities_decoded + '>';
}
var mbox = "From [email protected]\r\n";
for (var key in headers) {
mbox += key + ':' + headers[key] + "\r\n";
}
mbox += "\r\n";
mbox += d
.querySelector('#body')
.textContent
.replace(/\r?\n/g, "\r\n")
.replace(/^\r?\n/, '')
.replace(/\s+$/, '')
.replace(/^(>*From )/mg, ">$1")
;
mbox += "\r\n";
mbox += "\r\n";
return mbox;
}
function offer_mbox_for_download(d, mbox) {
location.href = 'data:application/octet-stream,' +
encodeURIComponent(mbox);
}
function load_html_document(ownerDocument, address, onload) {
var loader = ownerDocument.createElement('iframe');
loader.addEventListener('load', function(evt) {
onload(evt.currentTarget.contentDocument);
evt.currentTarget.parentNode
.removeChild(evt.currentTarget);
}, false);
loader.src = address;
loader.style.display = 'none';
ownerDocument.body.appendChild(loader);
}
function message_list_to_mbox(d, onload) {
var elems = d.querySelectorAll('.messages-list li a[href$=".html"]');
var whole_mbox = "";
var funcs;
funcs = [].map.call(elems, function(a) {
return function() {
load_html_document(document, a.href, function(loaded_document) {
try {
whole_mbox += single_mail_to_mbox_string(loaded_document);
} catch(e) {
console.log(e);
}
if (funcs.length > 0) {
(funcs.pop())();
} else {
onload(whole_mbox)
}
});
};
});
// TODO: what if no messages?
(funcs.pop())();
}
function mbox_button_onclick(evt) {
var real_mbox = "";
var funcs;
funcs = [].map.call(document.querySelectorAll('.mbox:checked'), function(input) {
return function() {
var index_address = input.parentNode.parentNode
.querySelector('a').href;
load_html_document(document, index_address, function(loaded_document) {
message_list_to_mbox(loaded_document, function(mbox) {
real_mbox += mbox;
if (funcs.length) {
(funcs.pop())();
} else {
offer_mbox_for_download(document, real_mbox);
}
});
});
};
});
(funcs.pop())();
}
if (location.href.match(/Archives[/][^/]+[/][^/]+[/]$/)) {
document.addEventListener("DOMContentLoaded", function(){
[].forEach.call(document.querySelector('table').rows, function(row) {
var cell = row.insertCell(-1);
if (row.textContent.match(/period/)) {
cell.innerHTML = "<input type='submit' value='mbox'>";
cell.firstChild.addEventListener('click', mbox_button_onclick, false);
} else {
cell.innerHTML = "<input type='checkbox' class='mbox'>"
cell.align = 'center';
}
})
}, false);
}
})(document);
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment