Last active
August 29, 2015 13:57
-
-
Save sarahbkim/9861854 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
var google = require('google'); | |
var request = require('request'); | |
var cheerio = require('cheerio'); | |
var fs = require('fs'); | |
/* SOCIAL MEDIA WEBSITES REGEX */ | |
var socialURLS = { | |
'fb': /\bfacebook.com\b/, | |
'tw': /\btwitter.com\b/, | |
'gplus': /\bplus.google.com\b/, | |
'ig': /\binstagram.com\b/ | |
} | |
/* commented out for now... | |
var contacts = []; | |
/* SCRAPE GOOGLE SEARCH RESULTS | |
google.resultsPerPage = 25; | |
var nextCounter = 0; | |
function search(q) { | |
google.resultsPerPage = 25; | |
var nextCounter = 0; | |
google(q, function(err, next, links){ | |
if (err) console.error(err); | |
for (var i = 0; i < links.length; ++i) { | |
contacts.push( { "title":links[i].title, "url": links[i].href } ); | |
} | |
if (nextCounter < 4) { | |
nextCounter += 1; | |
if (next) next(); | |
} | |
console.log(contacts); | |
return contacts; | |
}); | |
} | |
search('healthy breakfast ideas'); | |
*/ | |
var contacts = [ | |
{ title: 'Healthy breakfast: Quick, flexible options to grab at home - Mayo Clinic', | |
url: 'http://www.mayoclinic.org/healthy-living/nutrition-and-healthy-eating/in-depth/food-and-nutrition/art-20048294' }, | |
{ title: 'Healthy Breakfast Ideas from Dr. Weil\'s Facebook Readers', | |
url: 'http://www.drweil.com/drw/u/ART03113/Healthy-Breakfast-Ideas-from-Facebook-Readers.html' }, | |
{ title: '8 Healthy Breakfast Ideas - Prevention.com', | |
url: 'http://www.prevention.com/food/healthy-eating-tips/8-healthy-breakfast-ideas' }, | |
{ title: 'Quick & Easy Healthy Breakfast Ideas! - YouTube', | |
url: 'http://www.youtube.com/watch?v=mD4YSD8LDiQ' }, | |
{ title: 'The Best Foods to Eat for Breakfast - Health.com', | |
url: 'http://www.health.com/health/gallery/0,,20676415,00.html' }, | |
{ title: 'Healthy Breakfast Recipes - Secrets To Cooking Healthier - YouTube', | |
url: 'http://www.youtube.com/watch?v=7jH0xe1XKxI' }, | |
{ title: 'Healthy Breakfast Ideas You Can Make the Night Before - FitSugar', | |
url: 'http://www.fitsugar.com/Healthy-Breakfast-Ideas-You-Can-Make-Night-Before-20048633' }, | |
{ title: '10 Easy, 5-Minute Breakfast Ideas - Diet and ... - Everyday Health', | |
url: 'http://www.everydayhealth.com/diet-and-nutrition-pictures/easy-5-minute-breakfast-ideas.aspx' }, | |
{ title: 'Healthy Breakfast Ideas for Kids | Parenting - Parenting.com', | |
url: 'http://www.parenting.com/gallery/healthy-breakfast-ideas-kids' }, | |
{ title: 'Fruits & Veggies More Matters : Healthy Breakfast Ideas : Health ...', | |
url: 'http://www.fruitsandveggiesmorematters.org/healthy-breakfast-ideas' } | |
]; | |
scraper(contacts); | |
// loops through contacts database and scrapes requested information | |
function scraper(contacts) { | |
// Adds the domain of each contact | |
for(var i=0;i<contacts.length;i++){ | |
contacts[i].domain = contacts[i].url.split(/\//, 3).join().replace(/,/g, '/'); | |
}; | |
for(var i=0;i<contacts.length;i++){ | |
var homepage = contacts[i].domain; | |
var results = []; | |
//scrapes homepage for any social links | |
function socialScrape(homepage) { | |
request(homepage, function(err, resp, html) { | |
var $ = cheerio.load(html); | |
if(!err && resp.statusCode == 200) { | |
$('a').each(function(i, el){ | |
var a = $(el).attr('href'); | |
for(var key in socialURLS){ | |
if(socialURLS[key].test(a) && results.indexOf(a) < 0){ | |
results.push(a); | |
} | |
} | |
}); | |
} else { console.log(err); } | |
}) | |
} | |
contacts[i].social = results; | |
socialScrape(homepage); | |
} | |
console.log(contacts); | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment