Skip to content

Instantly share code, notes, and snippets.

@akirattii
Last active March 15, 2017 04:24
Show Gist options
  • Save akirattii/9051bb396d7141147810dd41ea58305e to your computer and use it in GitHub Desktop.
Save akirattii/9051bb396d7141147810dd41ea58305e to your computer and use it in GitHub Desktop.
How to scrape the member page that login required.
/**
$ node member-page-scraping.js <loginId> <password>
*/
var request = require("request");
request = request.defaults({ jar: true });
var encoding = require('encoding-japanese');
const cheerio = require("cheerio");
const Step = require("step");
var config = require("config");
var loginId = process.argv[2];
var password = process.argv[3];
var jar; // Cookie jar
var uri;
// TODO: Set page encoding
const ENCODE = "EUC-JP";
/**
* Login
*/
function login() {
jar = request.jar();
let url = 'https://www.example';
let method = "POST";
let json = true;
let form = {
loginId,
password
};
let headers = {
//'Content-Type':'application/json'
};
let options = {
url,
method,
headers,
json,
form,
jar,
};
request(options, function(error, response, body) {
//
Step(
function(err) {
if (err) throw err;
// Login
request(options, this)
},
function(err, resp, body) {
if (err) throw err;
// Move member page
memberPageRequest("https://mypage.example", this);
},
function(err, $body) {
if (err) throw err;
console.log("html:", $body.html());
}); // Step
});
}
/**
* moves internal page in which login required
*/
function memberPageRequest(url, cb) {
console.log("url:", url);
let method = "GET";
let json = true;
let headers = {
"Content-Type": "content=text/html; charset=" + ENCODE
};
// console.log("authedHeaders",authedHeaders);
let options = {
url,
method,
headers,
json,
jar,
encoding: null, // for converting EUC-JP to unicode
};
request(options, function(err, response, body) {
if (err) cb && cb(err);
// converts EUC-JP to unicode
let unicodeArr = encoding.convert(body, {
from: ENCODE,
to: 'UNICODE'
});
let html = encoding.codeToString(unicodeArr);
// console.log(html);
let $ = cheerio.load(html);
cb && cb(err, $("body"));
});
}
// Fire!
login();
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment