-
-
Save you21979/11368682 to your computer and use it in GitHub Desktop.
スクレイピング練習
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
http://tips.hecomi.com/entry/20130108/1357653054 | |
npm search phantomjs | |
npm install phantom cheerio |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
var phantom = require('phantom'); | |
var cheerio = require('cheerio'); | |
phantom.create(function(ph) { | |
ph.createPage(function(page) { | |
// ページが読み込まれたら page.onCallback を呼ぶ | |
page.set('onInitialized', function() { | |
page.evaluate(function() { | |
document.addEventListener('DOMContentLoaded', function() { | |
window.callPhantom('DOMContentLoaded'); | |
}, false); | |
}); | |
}); | |
// ページが読み込まれたら登録した関数の配列を順次実行してくれるクラス | |
var funcs = function(funcs) { | |
this.funcs = funcs; | |
this.init(); | |
}; | |
funcs.prototype = { | |
// ページが読み込まれたら next() を呼ぶ | |
init: function() { | |
var self = this; | |
page.set('onCallback', function(data) { | |
if (data === 'DOMContentLoaded') self.next(); | |
}); | |
}, | |
// 登録した関数の配列から1個取り出して実行 | |
next: function() { | |
var func = this.funcs.shift(); | |
if (func !== undefined) { | |
func(); | |
} else { | |
page.set('onCallback', undefined); | |
} | |
} | |
}; | |
// 順次実行する関数 | |
new funcs([ | |
function() { | |
console.log('ログイン処理'); | |
page.open('https://www.hatena.ne.jp/login'); // 次ページヘ | |
}, | |
function() { | |
console.log('ログイン画面'); | |
page.evaluate(function() { | |
document.getElementById('login-name').value = 'はてなの ID'; | |
document.querySelector('.password').value = 'パスワード'; | |
document.querySelector('form').submit(); // 次ページヘ | |
}); | |
}, | |
function() { | |
console.log('ログイン後画面'); | |
setTimeout(function() { | |
page.open('http://www.hatena.ne.jp/my'); | |
}, 2000); | |
}, | |
function() { | |
console.log('iframe 内'); | |
// iframe 内の HTML を取得 | |
page.evaluate(function() { | |
return document.getElementsByTagName('html')[0].innerHTML; | |
}, function(html) { | |
// cheerio でパースしてユーザ名とポイントを取得 | |
var $ = cheerio.load(html); | |
var point = $('.hatena-module').eq(0).find('.count').text(); | |
console.log('ポイントは後', point, 'point だよ!'); | |
// お忘れなきよう (-人-) | |
ph.exit(); | |
}); | |
} | |
]).next(); | |
}); | |
}); |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment