Created
May 26, 2016 16:56
-
-
Save jconwell/a2bae00bc29fc48e53ed6c680eabbcf0 to your computer and use it in GitHub Desktop.
Google script to mine a unique list of email addresses from gmail
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
function getEmails() { | |
// get all messages | |
var allAddresses = []; | |
var start = 0; | |
var emailCount = 500; | |
var doIt = true; | |
while (doIt) { | |
var emails = GmailApp.getMessagesForThreads(GmailApp.search('after:2012/9/1', start, emailCount)) | |
start += emails.length; | |
doIt = emailCount == emails.length; | |
var addresses = emails.reduce(function(a, b) { | |
//flatten out array of array of messages | |
return a.concat(b); | |
}) | |
.map(function(eMails) { | |
// pull all email addresses from email into array | |
var adds = [eMails.getFrom().toLowerCase()]; | |
adds = adds.concat(eMails.getTo().toLowerCase().split(", ")); | |
adds = adds.concat(eMails.getCc().toLowerCase().split(", ")); | |
adds = adds.concat(eMails.getBcc().toLowerCase().split(", ")); | |
return adds; | |
}) | |
.reduce(function(a, b) { | |
// concat all addresses from all emails into one array | |
return a.concat(b); | |
}, []) | |
.filter(function(a) { | |
//filter out empty elements, or elements without an @ sign | |
return a.trim().length > 0 && a.search("@") >= 0; | |
}) | |
.map(function(email) { | |
// pull out just the email address from each item | |
var start = email.indexOf("<"); | |
var end = email.indexOf(">"); | |
if (start >= 0) { | |
return email.substring(start+1, end); | |
} | |
return email; | |
}) | |
.sort() | |
.filter(function(el,i,a){ | |
//filter for unique entries | |
if(i==a.indexOf(el))return 1;return 0 | |
}); | |
allAddresses = allAddresses.concat(addresses) | |
} | |
// sort and filter for unique entries...one more time! | |
allAddresses = allAddresses.sort() | |
.filter(function(el,i,a){ | |
if(i==a.indexOf(el))return 1;return 0 | |
}) | |
.sort(function(a, b) { | |
// sort emails by domain | |
return a.substring(a.indexOf("@")+1).localeCompare(b.substring(b.indexOf("@")+1)); | |
}); | |
// create 2D-array | |
var aUnique = new Array(); | |
for(var k in allAddresses) { | |
aUnique.push([allAddresses[k]]); | |
} | |
// add data to sheet | |
SpreadsheetApp.getActiveSheet().getRange(1, 1, aUnique.length, 1).setValues(aUnique); | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Failing test case:
To:
"Jim, Savers" <[email protected]>
it will tokenize on the
,
inside the email