Skip to content

Instantly share code, notes, and snippets.

@twolfson
Last active September 17, 2021 08:23
Show Gist options
  • Save twolfson/5803105 to your computer and use it in GitHub Desktop.
Save twolfson/5803105 to your computer and use it in GitHub Desktop.
Scrape all GitHub watchers, stargazers, and followers
node_modules/
fixtures/
output/
config.json
// Load in module dependencies
var fs = require('fs'),
assert = require('assert'),
_ = require('underscore'),
config = require('./config');
// Generate an output directory
try { fs.mkdirSync('output/'); } catch (e) {}
// Read in our files
var userObj = _.extend(
{},
require('./output/followers.emails.json'),
require('./output/stargazers.emails.json'),
require('./output/watchers.emails.json'),
require('./output/found.emails.json'));
// Collect emails into an array
var emails = [];
_.each(userObj, function (email, username) {
// If the filter is in our exceptions, skip it
if (config.usernameFilters.indexOf(username) > -1) {
return false;
}
// Otherwise, save the email
emails.push(email);
});
// Write out the emails to a file
fs.writeFileSync('output/aggregate.txt', emails.join('\n'));
// Notify myself
console.log('Wrote out emails from "output/followers.emails.json", "output/stargazers.emails.json", "output/watchers.emails.json", "output/found.emails.json" to "output/aggregate.txt"');
{
"username": "twolfson",
"password": "your_password",
"organization": "Ensighten",
"githubFilters": [
"Ensighten/CSS-Query", "Ensighten/JSonParser", "Ensighten/SQLParser",
"Ensighten/SQLParser-node.js", "Ensighten/node-mysql",
"Ensighten/Mason.js", "Ensighten/Sauron",
"Ensighten/crossbones", "Ensighten/Kizzy",
"Ensighten/cookie.js", "Ensighten/argspecjs",
"Ensighten/gowiththeflow.js", "Ensighten/yarnify",
"Ensighten/jqueryp", "Ensighten/Builder",
"Ensighten/Halo", "Ensighten/Halo.extras",
"Ensighten/JSON-js", "Ensighten/todomvc", "Ensighten/glu"
],
"usernameFilters": ["github/thatguy"]
}
Copyright (c) 2013 Todd Wolfson
Permission is hereby granted, free of charge, to any person
obtaining a copy of this software and associated documentation
files (the "Software"), to deal in the Software without
restriction, including without limitation the rights to use,
copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the
Software is furnished to do so, subject to the following
conditions:
The above copyright notice and this permission notice shall be
included in all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
OTHER DEALINGS IN THE SOFTWARE.
// Load in module dependencies
var fs = require('fs'),
assert = require('assert'),
async = require('async'),
GitHub = require('github'),
_ = require('underscore'),
github = new GitHub({version: '3.0.0'}),
config = require('./config');
// Generate an output directory
try { fs.mkdirSync('output/'); } catch (e) {}
// Load in replay for development purposes
require('replay');
// Authenticate github
github.authenticate({
type: 'basic',
username: config.username,
password: config.password
});
// If we want to pull down the followers
if (false) {
// Pull down the followers
github.user.getFollowers({
user: config.username
}, function (err, res) {
// If there is an error, throw it
if (err) { throw err; }
// Pull down all of the emails
var users = [].slice.call(res);
async.map(users, function getUser (user, cb) {
github.user.getFrom({user: user.login}, cb);
}, function interpretUserRes (err, resArr) {
// If there were an errors, throw them
if (err) { throw err; }
// Break up users into two parts, emails and non-emails
var emails = {},
nonEmails = {};
resArr.forEach(function saveUser (user) {
var key = 'github/' + user.login,
email = user.email;
if (email) {
emails[key] = email;
} else {
nonEmails[key] = user.html_url;
}
});
// Save the emails and nonEmails
fs.writeFileSync('output/followers.emails.json', JSON.stringify(emails, null, 4));
fs.writeFileSync('output/followers.non-emails.json', JSON.stringify(nonEmails, null, 4));
// Notify myself
console.log('Wrote out "output/followers.emails.json" and "output/followers.non-emails.json"');
});
});
}
// If we want to pull down the watchers and stargarzers
if (true) {
async.parallel([
function grabAllUserRepos (done) {
// Pull down all pages of user repos
var repos = [],
pageCount = 1,
lastRes;
async.doWhilst(function grabNextUserRepos (cb) {
// Pull down the current repo page
github.repos.getFromUser({
user: config.username,
page: pageCount
}, function concatUserRepos (err, res) {
// If there is an error, callback with it
if (err) { return cb(err); }
// Otherwise, concatenate the response
repos.push.apply(repos, res);
// Save the response for later
lastRes = res;
// Callback
cb();
});
}, function isNotLastRes () {
// Bump the page count
pageCount += 1;
// Return if there are any more pages
return lastRes.meta.link.indexOf('rel="next"') > -1;
}, function handleUserRepos (err) {
// Callback with error ad repos
done(err, repos);
});
},
// WARNING: THIS WAS COPY/PASTED. THIS SCRIPT IS HIGHLY UNMAINTAINABLE.
function grabAllOrganizationRepos (done) {
// Pull down all pages of user repos
var repos = [],
pageCount = 1,
lastRes;
async.doWhilst(function grabNextOrgRepos (cb) {
// Pull down the current repo page
github.repos.getFromOrg({
org: config.organization,
page: pageCount
}, function concatOrgRepos (err, res) {
// If there is an error, callback with it
if (err) { return cb(err); }
// Otherwise, concatenate the response
repos.push.apply(repos, res);
// Save the response for later
lastRes = res;
// Callback
cb();
});
}, function isNotLastRes () {
// Bump the page count
pageCount += 1;
// Return if there are any more pages
var meta = lastRes.meta || {},
metaLink = meta.link || '';
return metaLink.indexOf('rel="next"') > -1;
}, function handleOrgRepos (err) {
// Apply filters
repos = repos.filter(function (repo) {
return config.githubFilters.indexOf(repo.full_name) === -1;
});
// Callback with error ad repos
done(err, repos);
});
}
], function handleUserAndOrgRepos (err, resArr) {
// If there is an error, throw it
if (err) { throw err; }
// Otherwise, join together the results
var repos = [].concat.apply([], resArr);
// If we are pulling down the stargazers
if (false) {
// Pull down the stargazers of all the repos
async.map(repos, function getRepoStargazers (repo, cb) {
github.repos.getStargazers({
user: repo.owner.login,
repo: repo.name,
page: 1,
per_page: 100
}, cb);
}, function collectRepoStargazers (err, stargazerArr) {
// Assert no pages left behind
assert(!(stargazerArr.map(function (item) { return item.meta.link || ''; }).join('')), 'Deal with multiple pages of stargazers #fwp');
// Move the stargazers into one giant array
var stargazers = [].concat.apply([], stargazerArr);
// Unique-ify stargazers
stargazers = _.unique(stargazers, function (stargazer) {
return stargazer.id;
});
// DEV: Don't need to worry about filtering out users since we have REPLAY
// Pull down each users info
// WARNING: COPY/PASTE FROM PREVIOUS SECTION
async.map(stargazers, function getStargazer (user, cb) {
github.user.getFrom({user: user.login}, cb);
}, function interpretStargazerRes (err, resArr) {
// If there were an errors, throw them
if (err) { throw err; }
// Break up users into two parts, emails and non-emails
var emails = {},
nonEmails = {};
resArr.forEach(function saveStargazer (user) {
var key = 'github/' + user.login,
email = user.email;
if (email) {
emails[key] = email;
} else {
nonEmails[key] = user.html_url;
}
});
// Save the emails and nonEmails
fs.writeFileSync('output/stargazers.emails.json', JSON.stringify(emails, null, 4));
fs.writeFileSync('output/stargazers.non-emails.json', JSON.stringify(nonEmails, null, 4));
// Notify myself
console.log('Wrote out "output/stargazers.emails.json" and "output/stargazers.non-emails.json"');
});
});
}
// If we are pulling down the watchers
// WARNING: COPY/PASTE FROM PREVIOUS SECTION
if (true) {
// Pull down the watchers of all the repos
async.map(repos, function getRepoWatchers (repo, cb) {
github.repos.getWatchers({
user: repo.owner.login,
repo: repo.name,
page: 1,
per_page: 100
}, cb);
}, function collectRepoWatchers (err, watcherArr) {
// Assert no pages left behind
assert(!(watcherArr.map(function (item) { return item.meta.link || ''; }).join('')), 'Deal with multiple pages of watchers #fwp');
// Move the watchers into one giant array
var watchers = [].concat.apply([], watcherArr);
// Unique-ify watchers
watchers = _.unique(watchers, function (watcher) {
return watcher.id;
});
// DEV: Don't need to worry about filtering out users since we have REPLAY
// Pull down each users info
// WARNING: COPY/PASTE FROM PREVIOUS SECTION
async.map(watchers, function getWatcher (user, cb) {
github.user.getFrom({user: user.login}, cb);
}, function interpretWatcherRes (err, resArr) {
// If there were an errors, throw them
if (err) { throw err; }
// Break up users into two parts, emails and non-emails
var emails = {},
nonEmails = {};
resArr.forEach(function savewatcher (user) {
var key = 'github/' + user.login,
email = user.email;
if (email) {
emails[key] = email;
} else {
nonEmails[key] = user.html_url;
}
});
// Save the emails and nonEmails
fs.writeFileSync('output/watchers.emails.json', JSON.stringify(emails, null, 4));
fs.writeFileSync('output/watchers.non-emails.json', JSON.stringify(nonEmails, null, 4));
// Notify myself
console.log('Wrote out "output/watchers.emails.json" and "output/watchers.non-emails.json"');
});
});
}
});
}
{
"name": "gist-github-followers",
"description": "The best project ever.",
"version": "0.1.0",
"homepage": "",
"author": {
"name": "Todd Wolfson",
"email": "[email protected]",
"url": "http://twolfson.com/"
},
"repository": {
"type": "git",
"url": "Scrape all GitHub watchers, stargazers, and followers"
},
"bugs": {
"url": ""
},
"licenses": [
{
"type": "MIT",
"url": "/blob/master/LICENSE-MIT"
}
],
"main": "lib/gist-github-followers",
"engines": {
"node": ">= 0.6.0"
},
"scripts": {},
"devDependencies": {},
"keywords": [],
"dependencies": {
"github": "~0.1.10",
"replay": "~1.7.0",
"async": "~0.2.9",
"underscore": "~1.4.4"
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment