Created
January 12, 2012 00:00
-
-
Save huned/1597577 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| parseEmailAddressesFromString = function(s) { | |
| // match email address formats | |
| var r = /([a-z0-9._%+-]+@[a-z0-9.-]+(?:\.[a-z]{2,4})*)/i | |
| , o = {} | |
| , keys = []; | |
| // populate properties in o | |
| s.toLowerCase().split(r).forEach(function(fragment) { | |
| if (r.test(fragment)) o[fragment.trim()] = true; | |
| }); | |
| // push unique property names into array | |
| for (var k in o) { | |
| keys.push(k); | |
| } | |
| // return sorted array of property names | |
| return keys.sort(); | |
| } | |
| parseDomainsFromEmailAddresses = function(email_addresses) { | |
| var r = /(@[^@]+)$/i | |
| , o = {} | |
| , keys = []; | |
| email_addresses.forEach(function(email_address) { | |
| email_address.split(r).forEach(function(fragment) { | |
| if (/@/.test(fragment)) { | |
| o[fragment.replace('@', '').toLowerCase()] = true; | |
| } | |
| }); | |
| }); | |
| // push unique property names into array | |
| for (var k in o) { | |
| keys.push(k); | |
| } | |
| // return sorted array of property names | |
| return keys.sort(); | |
| } | |
| new Date(); | |
| db.emails.find({from_addr: {$exists: false}}, {from: 1, to: 1, cc: 1}). | |
| limit(1000000).forEach(function(e) { | |
| // unique, normalized email address strings | |
| var from_addr = parseEmailAddressesFromString(e.from || '')[0] | |
| , to_addrs = parseEmailAddressesFromString(e.to || '') | |
| , cc_addrs = parseEmailAddressesFromString(e.cc || ''); | |
| // skip pathological cases | |
| if (!from_addr || (to_addrs == [] && cc_addrs == [])) return; | |
| // unique, normalized domain strings | |
| var from_domain = parseDomainsFromEmailAddresses([from_addr])[0] | |
| , to_domains = parseDomainsFromEmailAddresses(to_addrs) | |
| , cc_domains = parseDomainsFromEmailAddresses(cc_addrs); | |
| // contact ids | |
| var from_person_id = null | |
| , from_company_id = null | |
| , to_person_ids = [] | |
| , to_company_ids = [] | |
| , cc_person_ids = [] | |
| , cc_company_ids = []; | |
| db.contacts.find({ $or: [ | |
| {_type: 'Person', 'email_addresses.address': { | |
| $in: to_addrs.concat(cc_addrs).concat(from_addr).map(function(s) { | |
| return s + '.sequoiacap.com'; | |
| }) | |
| }}, | |
| {_type: 'Company', 'domain_aliases': { | |
| $in: to_domains.concat(cc_domains).concat(from_domain) | |
| }}, | |
| ]}, {_id: 1, 'email_addresses.address': 1}).forEach(function(contact) { | |
| switch(contact._type) { | |
| case 'Person': | |
| var addresses = contact.email_addresses.map(function(addr) { | |
| return addr.address.toLowerCase().trim(); | |
| }); | |
| for (var i in addresses) { | |
| var address = addresses[i]; | |
| if (to_addrs.indexOf(address) >= 0) { | |
| to_person_ids.push(contact._id); | |
| break; | |
| } else if (cc_addrs.indexOf(address) >= 0) { | |
| cc_person_ids.push(contact._id); | |
| break; | |
| } else if (address === from_addr) { | |
| from_person_id = contact._id; | |
| break; | |
| } | |
| } | |
| break; | |
| case 'Company': | |
| for (var i in contact.domain_aliases) { | |
| var domain = contact.domain_aliases[i].toLowerCase().trim(); | |
| if (to_domains.indexOf(domain)) { | |
| to_company_ids.push(contact._id); | |
| break; | |
| } else if (cc_domains.indexOf(domain)) { | |
| cc_company_ids.push(company._id); | |
| break; | |
| } else if (domain === from_domain) { | |
| from_domain_id = contact._id; | |
| break; | |
| } | |
| } | |
| break; | |
| } | |
| }); | |
| db.emails.update({_id: e._id}, { $set: { | |
| from_addr: from_addr, | |
| from_domain: from_domain, | |
| from_person_id: from_person_id, | |
| from_company_id: from_company_id, | |
| to_addrs: to_addrs, | |
| to_domains: to_domains, | |
| to_person_ids: to_person_ids, | |
| to_company_ids: to_company_ids, | |
| cc_addrs: cc_addrs, | |
| cc_domains: cc_domains, | |
| cc_person_ids: cc_person_ids, | |
| cc_company_ids: cc_company_ids, | |
| } | |
| }); | |
| }); | |
| new Date(); | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment