Created
June 8, 2013 10:08
-
-
Save hoehrmann/5734742 to your computer and use it in GitHub Desktop.
User script ("greasemonkey script") web browser extension that shows on pages like http://lists.w3.org/Archives/Public/www-talk/ checkboxes to select periods and a [mbox] button that when clicked retrieves mails from the archive according to the selection and then turns them into plain text mbox files that can be saved to disk for import into ma…
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// ==UserScript== | |
// @name Generate mbox files for W3C list archives | |
// @description Generate mbox files for W3C list archives | |
// @version 0.9 | |
// @author Bjoern Hoehrmann | |
// @license GPLv3+ | |
// @include http://lists.w3.org/Archives/* | |
// @include https://lists.w3.org/Archives/* | |
// ==/UserScript== | |
(function(document) { | |
function single_mail_to_mbox_string(d) { | |
var get = function(selector) { | |
try { | |
return d.querySelector(selector) | |
.textContent.replace(/\s+/g, ' '); | |
} catch(e) { | |
return ""; | |
} | |
}; | |
var list_name = d.baseURI.match(/Archives\/.*?\/([^/]+)/)[1]; | |
var headers = { | |
'Subject': get('h1'), | |
'Date': get('#date'), | |
'To': get('#to'), | |
'Message-Id': get('#message-id'), | |
'From': get('#from'), | |
'Archived-At': d.baseURI, | |
'Content-Type': 'text/plain; charset=utf-8', | |
'Content-Transfer-Encoding': '8bit', | |
'Mime-Version': '1.0', | |
'Resent-From': list_name + '@w3.org', | |
'Resent-Sender': list_name + '[email protected]', | |
'Precedence': 'list', | |
'List-Id': '<' + list_name + '.w3.org>', | |
'List-Help': '<http://www.w3.org/Mail/>', | |
'List-Post': '<mailto:' + list_name + '@w3.org>', | |
'List-Unsubscribe': '<mailto:' + list_name + | |
'[email protected]?subject=unsubscribe>', | |
}; | |
var reply = d.body.innerHTML.match(/<!-- inreplyto="(.*?)"/); | |
if (reply && reply[1].indexOf('<') < 0 && "browsers suck") { | |
var elem = d.createElement('p'); | |
elem.innerHTML = reply[1]; | |
var entities_decoded = elem.textContent; | |
headers['In-Reply-To'] = '<' + entities_decoded + '>'; | |
} | |
var mbox = "From [email protected]\r\n"; | |
for (var key in headers) { | |
mbox += key + ':' + headers[key] + "\r\n"; | |
} | |
mbox += "\r\n"; | |
mbox += d | |
.querySelector('#body') | |
.textContent | |
.replace(/\r?\n/g, "\r\n") | |
.replace(/^\r?\n/, '') | |
.replace(/\s+$/, '') | |
.replace(/^(>*From )/mg, ">$1") | |
; | |
mbox += "\r\n"; | |
mbox += "\r\n"; | |
return mbox; | |
} | |
function offer_mbox_for_download(d, mbox) { | |
location.href = 'data:application/octet-stream,' + | |
encodeURIComponent(mbox); | |
} | |
function load_html_document(ownerDocument, address, onload) { | |
var loader = ownerDocument.createElement('iframe'); | |
loader.addEventListener('load', function(evt) { | |
onload(evt.currentTarget.contentDocument); | |
evt.currentTarget.parentNode | |
.removeChild(evt.currentTarget); | |
}, false); | |
loader.src = address; | |
loader.style.display = 'none'; | |
ownerDocument.body.appendChild(loader); | |
} | |
function message_list_to_mbox(d, onload) { | |
var elems = d.querySelectorAll('.messages-list li a[href$=".html"]'); | |
var whole_mbox = ""; | |
var funcs; | |
funcs = [].map.call(elems, function(a) { | |
return function() { | |
load_html_document(document, a.href, function(loaded_document) { | |
try { | |
whole_mbox += single_mail_to_mbox_string(loaded_document); | |
} catch(e) { | |
console.log(e); | |
} | |
if (funcs.length > 0) { | |
(funcs.pop())(); | |
} else { | |
onload(whole_mbox) | |
} | |
}); | |
}; | |
}); | |
// TODO: what if no messages? | |
(funcs.pop())(); | |
} | |
function mbox_button_onclick(evt) { | |
var real_mbox = ""; | |
var funcs; | |
funcs = [].map.call(document.querySelectorAll('.mbox:checked'), function(input) { | |
return function() { | |
var index_address = input.parentNode.parentNode | |
.querySelector('a').href; | |
load_html_document(document, index_address, function(loaded_document) { | |
message_list_to_mbox(loaded_document, function(mbox) { | |
real_mbox += mbox; | |
if (funcs.length) { | |
(funcs.pop())(); | |
} else { | |
offer_mbox_for_download(document, real_mbox); | |
} | |
}); | |
}); | |
}; | |
}); | |
(funcs.pop())(); | |
} | |
if (location.href.match(/Archives[/][^/]+[/][^/]+[/]$/)) { | |
document.addEventListener("DOMContentLoaded", function(){ | |
[].forEach.call(document.querySelector('table').rows, function(row) { | |
var cell = row.insertCell(-1); | |
if (row.textContent.match(/period/)) { | |
cell.innerHTML = "<input type='submit' value='mbox'>"; | |
cell.firstChild.addEventListener('click', mbox_button_onclick, false); | |
} else { | |
cell.innerHTML = "<input type='checkbox' class='mbox'>" | |
cell.align = 'center'; | |
} | |
}) | |
}, false); | |
} | |
})(document); |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment