Last active
September 17, 2021 08:23
-
-
Save twolfson/5803105 to your computer and use it in GitHub Desktop.
Scrape all GitHub watchers, stargazers, and followers
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
node_modules/ | |
fixtures/ | |
output/ | |
config.json |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// Load in module dependencies | |
var fs = require('fs'), | |
assert = require('assert'), | |
_ = require('underscore'), | |
config = require('./config'); | |
// Generate an output directory | |
try { fs.mkdirSync('output/'); } catch (e) {} | |
// Read in our files | |
var userObj = _.extend( | |
{}, | |
require('./output/followers.emails.json'), | |
require('./output/stargazers.emails.json'), | |
require('./output/watchers.emails.json'), | |
require('./output/found.emails.json')); | |
// Collect emails into an array | |
var emails = []; | |
_.each(userObj, function (email, username) { | |
// If the filter is in our exceptions, skip it | |
if (config.usernameFilters.indexOf(username) > -1) { | |
return false; | |
} | |
// Otherwise, save the email | |
emails.push(email); | |
}); | |
// Write out the emails to a file | |
fs.writeFileSync('output/aggregate.txt', emails.join('\n')); | |
// Notify myself | |
console.log('Wrote out emails from "output/followers.emails.json", "output/stargazers.emails.json", "output/watchers.emails.json", "output/found.emails.json" to "output/aggregate.txt"'); |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{ | |
"username": "twolfson", | |
"password": "your_password", | |
"organization": "Ensighten", | |
"githubFilters": [ | |
"Ensighten/CSS-Query", "Ensighten/JSonParser", "Ensighten/SQLParser", | |
"Ensighten/SQLParser-node.js", "Ensighten/node-mysql", | |
"Ensighten/Mason.js", "Ensighten/Sauron", | |
"Ensighten/crossbones", "Ensighten/Kizzy", | |
"Ensighten/cookie.js", "Ensighten/argspecjs", | |
"Ensighten/gowiththeflow.js", "Ensighten/yarnify", | |
"Ensighten/jqueryp", "Ensighten/Builder", | |
"Ensighten/Halo", "Ensighten/Halo.extras", | |
"Ensighten/JSON-js", "Ensighten/todomvc", "Ensighten/glu" | |
], | |
"usernameFilters": ["github/thatguy"] | |
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Copyright (c) 2013 Todd Wolfson | |
Permission is hereby granted, free of charge, to any person | |
obtaining a copy of this software and associated documentation | |
files (the "Software"), to deal in the Software without | |
restriction, including without limitation the rights to use, | |
copy, modify, merge, publish, distribute, sublicense, and/or sell | |
copies of the Software, and to permit persons to whom the | |
Software is furnished to do so, subject to the following | |
conditions: | |
The above copyright notice and this permission notice shall be | |
included in all copies or substantial portions of the Software. | |
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, | |
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES | |
OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND | |
NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT | |
HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, | |
WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING | |
FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR | |
OTHER DEALINGS IN THE SOFTWARE. |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// Load in module dependencies | |
var fs = require('fs'), | |
assert = require('assert'), | |
async = require('async'), | |
GitHub = require('github'), | |
_ = require('underscore'), | |
github = new GitHub({version: '3.0.0'}), | |
config = require('./config'); | |
// Generate an output directory | |
try { fs.mkdirSync('output/'); } catch (e) {} | |
// Load in replay for development purposes | |
require('replay'); | |
// Authenticate github | |
github.authenticate({ | |
type: 'basic', | |
username: config.username, | |
password: config.password | |
}); | |
// If we want to pull down the followers | |
if (false) { | |
// Pull down the followers | |
github.user.getFollowers({ | |
user: config.username | |
}, function (err, res) { | |
// If there is an error, throw it | |
if (err) { throw err; } | |
// Pull down all of the emails | |
var users = [].slice.call(res); | |
async.map(users, function getUser (user, cb) { | |
github.user.getFrom({user: user.login}, cb); | |
}, function interpretUserRes (err, resArr) { | |
// If there were an errors, throw them | |
if (err) { throw err; } | |
// Break up users into two parts, emails and non-emails | |
var emails = {}, | |
nonEmails = {}; | |
resArr.forEach(function saveUser (user) { | |
var key = 'github/' + user.login, | |
email = user.email; | |
if (email) { | |
emails[key] = email; | |
} else { | |
nonEmails[key] = user.html_url; | |
} | |
}); | |
// Save the emails and nonEmails | |
fs.writeFileSync('output/followers.emails.json', JSON.stringify(emails, null, 4)); | |
fs.writeFileSync('output/followers.non-emails.json', JSON.stringify(nonEmails, null, 4)); | |
// Notify myself | |
console.log('Wrote out "output/followers.emails.json" and "output/followers.non-emails.json"'); | |
}); | |
}); | |
} | |
// If we want to pull down the watchers and stargarzers | |
if (true) { | |
async.parallel([ | |
function grabAllUserRepos (done) { | |
// Pull down all pages of user repos | |
var repos = [], | |
pageCount = 1, | |
lastRes; | |
async.doWhilst(function grabNextUserRepos (cb) { | |
// Pull down the current repo page | |
github.repos.getFromUser({ | |
user: config.username, | |
page: pageCount | |
}, function concatUserRepos (err, res) { | |
// If there is an error, callback with it | |
if (err) { return cb(err); } | |
// Otherwise, concatenate the response | |
repos.push.apply(repos, res); | |
// Save the response for later | |
lastRes = res; | |
// Callback | |
cb(); | |
}); | |
}, function isNotLastRes () { | |
// Bump the page count | |
pageCount += 1; | |
// Return if there are any more pages | |
return lastRes.meta.link.indexOf('rel="next"') > -1; | |
}, function handleUserRepos (err) { | |
// Callback with error ad repos | |
done(err, repos); | |
}); | |
}, | |
// WARNING: THIS WAS COPY/PASTED. THIS SCRIPT IS HIGHLY UNMAINTAINABLE. | |
function grabAllOrganizationRepos (done) { | |
// Pull down all pages of user repos | |
var repos = [], | |
pageCount = 1, | |
lastRes; | |
async.doWhilst(function grabNextOrgRepos (cb) { | |
// Pull down the current repo page | |
github.repos.getFromOrg({ | |
org: config.organization, | |
page: pageCount | |
}, function concatOrgRepos (err, res) { | |
// If there is an error, callback with it | |
if (err) { return cb(err); } | |
// Otherwise, concatenate the response | |
repos.push.apply(repos, res); | |
// Save the response for later | |
lastRes = res; | |
// Callback | |
cb(); | |
}); | |
}, function isNotLastRes () { | |
// Bump the page count | |
pageCount += 1; | |
// Return if there are any more pages | |
var meta = lastRes.meta || {}, | |
metaLink = meta.link || ''; | |
return metaLink.indexOf('rel="next"') > -1; | |
}, function handleOrgRepos (err) { | |
// Apply filters | |
repos = repos.filter(function (repo) { | |
return config.githubFilters.indexOf(repo.full_name) === -1; | |
}); | |
// Callback with error ad repos | |
done(err, repos); | |
}); | |
} | |
], function handleUserAndOrgRepos (err, resArr) { | |
// If there is an error, throw it | |
if (err) { throw err; } | |
// Otherwise, join together the results | |
var repos = [].concat.apply([], resArr); | |
// If we are pulling down the stargazers | |
if (false) { | |
// Pull down the stargazers of all the repos | |
async.map(repos, function getRepoStargazers (repo, cb) { | |
github.repos.getStargazers({ | |
user: repo.owner.login, | |
repo: repo.name, | |
page: 1, | |
per_page: 100 | |
}, cb); | |
}, function collectRepoStargazers (err, stargazerArr) { | |
// Assert no pages left behind | |
assert(!(stargazerArr.map(function (item) { return item.meta.link || ''; }).join('')), 'Deal with multiple pages of stargazers #fwp'); | |
// Move the stargazers into one giant array | |
var stargazers = [].concat.apply([], stargazerArr); | |
// Unique-ify stargazers | |
stargazers = _.unique(stargazers, function (stargazer) { | |
return stargazer.id; | |
}); | |
// DEV: Don't need to worry about filtering out users since we have REPLAY | |
// Pull down each users info | |
// WARNING: COPY/PASTE FROM PREVIOUS SECTION | |
async.map(stargazers, function getStargazer (user, cb) { | |
github.user.getFrom({user: user.login}, cb); | |
}, function interpretStargazerRes (err, resArr) { | |
// If there were an errors, throw them | |
if (err) { throw err; } | |
// Break up users into two parts, emails and non-emails | |
var emails = {}, | |
nonEmails = {}; | |
resArr.forEach(function saveStargazer (user) { | |
var key = 'github/' + user.login, | |
email = user.email; | |
if (email) { | |
emails[key] = email; | |
} else { | |
nonEmails[key] = user.html_url; | |
} | |
}); | |
// Save the emails and nonEmails | |
fs.writeFileSync('output/stargazers.emails.json', JSON.stringify(emails, null, 4)); | |
fs.writeFileSync('output/stargazers.non-emails.json', JSON.stringify(nonEmails, null, 4)); | |
// Notify myself | |
console.log('Wrote out "output/stargazers.emails.json" and "output/stargazers.non-emails.json"'); | |
}); | |
}); | |
} | |
// If we are pulling down the watchers | |
// WARNING: COPY/PASTE FROM PREVIOUS SECTION | |
if (true) { | |
// Pull down the watchers of all the repos | |
async.map(repos, function getRepoWatchers (repo, cb) { | |
github.repos.getWatchers({ | |
user: repo.owner.login, | |
repo: repo.name, | |
page: 1, | |
per_page: 100 | |
}, cb); | |
}, function collectRepoWatchers (err, watcherArr) { | |
// Assert no pages left behind | |
assert(!(watcherArr.map(function (item) { return item.meta.link || ''; }).join('')), 'Deal with multiple pages of watchers #fwp'); | |
// Move the watchers into one giant array | |
var watchers = [].concat.apply([], watcherArr); | |
// Unique-ify watchers | |
watchers = _.unique(watchers, function (watcher) { | |
return watcher.id; | |
}); | |
// DEV: Don't need to worry about filtering out users since we have REPLAY | |
// Pull down each users info | |
// WARNING: COPY/PASTE FROM PREVIOUS SECTION | |
async.map(watchers, function getWatcher (user, cb) { | |
github.user.getFrom({user: user.login}, cb); | |
}, function interpretWatcherRes (err, resArr) { | |
// If there were an errors, throw them | |
if (err) { throw err; } | |
// Break up users into two parts, emails and non-emails | |
var emails = {}, | |
nonEmails = {}; | |
resArr.forEach(function savewatcher (user) { | |
var key = 'github/' + user.login, | |
email = user.email; | |
if (email) { | |
emails[key] = email; | |
} else { | |
nonEmails[key] = user.html_url; | |
} | |
}); | |
// Save the emails and nonEmails | |
fs.writeFileSync('output/watchers.emails.json', JSON.stringify(emails, null, 4)); | |
fs.writeFileSync('output/watchers.non-emails.json', JSON.stringify(nonEmails, null, 4)); | |
// Notify myself | |
console.log('Wrote out "output/watchers.emails.json" and "output/watchers.non-emails.json"'); | |
}); | |
}); | |
} | |
}); | |
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{ | |
"name": "gist-github-followers", | |
"description": "The best project ever.", | |
"version": "0.1.0", | |
"homepage": "", | |
"author": { | |
"name": "Todd Wolfson", | |
"email": "[email protected]", | |
"url": "http://twolfson.com/" | |
}, | |
"repository": { | |
"type": "git", | |
"url": "Scrape all GitHub watchers, stargazers, and followers" | |
}, | |
"bugs": { | |
"url": "" | |
}, | |
"licenses": [ | |
{ | |
"type": "MIT", | |
"url": "/blob/master/LICENSE-MIT" | |
} | |
], | |
"main": "lib/gist-github-followers", | |
"engines": { | |
"node": ">= 0.6.0" | |
}, | |
"scripts": {}, | |
"devDependencies": {}, | |
"keywords": [], | |
"dependencies": { | |
"github": "~0.1.10", | |
"replay": "~1.7.0", | |
"async": "~0.2.9", | |
"underscore": "~1.4.4" | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment