Skip to content

Instantly share code, notes, and snippets.

@Jimbly
Last active August 29, 2015 13:59
Show Gist options
  • Save Jimbly/10771051 to your computer and use it in GitHub Desktop.
Save Jimbly/10771051 to your computer and use it in GitHub Desktop.
Page scraping with login
var request = require('request').defaults({ jar: true });
var config = require('./config.json');
var log = console.log.bind(console);
function wrapcb(orig_url, cb) {
return function(err, res, body) {
if (res && res.statusCode === 302) {
var url = require('url').resolve(orig_url, res.headers.location);
log('Redirect from ' + orig_url + ' to ' + url);
if (orig_url.indexOf('login') !== -1 && url.indexOf('login') !== -1) {
log('Redirect from login back to login, probably invalid password, check config.json');
log('You may have made too many login attempts and will have to wait a\nwhile before trying again.')
process.exit(-1);
}
return request.get(url, cb);
}
if (err) {
console.warn('Error getting ' + orig_url + ': ' + err);
} else if (res.statusCode !== 200) {
console.warn('Status code ' + res.statusCode);
log(res.headers);
} else {
cb(res, body);
}
};
}
function login(next) {
log('Logging in...');
request.get('https://mwomercs.com/login', function(err, res, body) {
request.post('https://mwomercs.com/do/login',
{form: {email: config.email, password: config.password}},
wrapcb('https://mwomercs.com/do/login', function(res, body) {
is_logged_in = true;
next();
})
);
});
}
function getPage(url, output, next) {
request.get(url,
wrapcb(url, function(res, body) {
if (body.indexOf('<title>MWO: Login</title>') !== -1) {
log('No longer logged in, trying again!');
login(getPage.bind(undefined, url, output, next));
return;
}
var file_name = file_dir + '/' + output + '.html';
status('Saved ' + url + ' to ' + file_name);
fs.writeFile(userdata + file_name, body, next);
})
);
}
login(function () {
getPage('https://mwomercs.com/profile/stats', 'stats.txt', function () {
log('done!');
});
});
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment