Skip to content

Instantly share code, notes, and snippets.

@huned
Created January 12, 2012 00:00
Show Gist options
  • Select an option

  • Save huned/1597577 to your computer and use it in GitHub Desktop.

Select an option

Save huned/1597577 to your computer and use it in GitHub Desktop.
parseEmailAddressesFromString = function(s) {
// match email address formats
var r = /([a-z0-9._%+-]+@[a-z0-9.-]+(?:\.[a-z]{2,4})*)/i
, o = {}
, keys = [];
// populate properties in o
s.toLowerCase().split(r).forEach(function(fragment) {
if (r.test(fragment)) o[fragment.trim()] = true;
});
// push unique property names into array
for (var k in o) {
keys.push(k);
}
// return sorted array of property names
return keys.sort();
}
parseDomainsFromEmailAddresses = function(email_addresses) {
var r = /(@[^@]+)$/i
, o = {}
, keys = [];
email_addresses.forEach(function(email_address) {
email_address.split(r).forEach(function(fragment) {
if (/@/.test(fragment)) {
o[fragment.replace('@', '').toLowerCase()] = true;
}
});
});
// push unique property names into array
for (var k in o) {
keys.push(k);
}
// return sorted array of property names
return keys.sort();
}
new Date();
db.emails.find({from_addr: {$exists: false}}, {from: 1, to: 1, cc: 1}).
limit(1000000).forEach(function(e) {
// unique, normalized email address strings
var from_addr = parseEmailAddressesFromString(e.from || '')[0]
, to_addrs = parseEmailAddressesFromString(e.to || '')
, cc_addrs = parseEmailAddressesFromString(e.cc || '');
// skip pathological cases
if (!from_addr || (to_addrs == [] && cc_addrs == [])) return;
// unique, normalized domain strings
var from_domain = parseDomainsFromEmailAddresses([from_addr])[0]
, to_domains = parseDomainsFromEmailAddresses(to_addrs)
, cc_domains = parseDomainsFromEmailAddresses(cc_addrs);
// contact ids
var from_person_id = null
, from_company_id = null
, to_person_ids = []
, to_company_ids = []
, cc_person_ids = []
, cc_company_ids = [];
db.contacts.find({ $or: [
{_type: 'Person', 'email_addresses.address': {
$in: to_addrs.concat(cc_addrs).concat(from_addr).map(function(s) {
return s + '.sequoiacap.com';
})
}},
{_type: 'Company', 'domain_aliases': {
$in: to_domains.concat(cc_domains).concat(from_domain)
}},
]}, {_id: 1, 'email_addresses.address': 1}).forEach(function(contact) {
switch(contact._type) {
case 'Person':
var addresses = contact.email_addresses.map(function(addr) {
return addr.address.toLowerCase().trim();
});
for (var i in addresses) {
var address = addresses[i];
if (to_addrs.indexOf(address) >= 0) {
to_person_ids.push(contact._id);
break;
} else if (cc_addrs.indexOf(address) >= 0) {
cc_person_ids.push(contact._id);
break;
} else if (address === from_addr) {
from_person_id = contact._id;
break;
}
}
break;
case 'Company':
for (var i in contact.domain_aliases) {
var domain = contact.domain_aliases[i].toLowerCase().trim();
if (to_domains.indexOf(domain)) {
to_company_ids.push(contact._id);
break;
} else if (cc_domains.indexOf(domain)) {
cc_company_ids.push(company._id);
break;
} else if (domain === from_domain) {
from_domain_id = contact._id;
break;
}
}
break;
}
});
db.emails.update({_id: e._id}, { $set: {
from_addr: from_addr,
from_domain: from_domain,
from_person_id: from_person_id,
from_company_id: from_company_id,
to_addrs: to_addrs,
to_domains: to_domains,
to_person_ids: to_person_ids,
to_company_ids: to_company_ids,
cc_addrs: cc_addrs,
cc_domains: cc_domains,
cc_person_ids: cc_person_ids,
cc_company_ids: cc_company_ids,
}
});
});
new Date();
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment