Created
December 13, 2016 16:35
-
-
Save ko31/267392c6d1fb32f1aa3d496e020b1266 to your computer and use it in GitHub Desktop.
【PHP】将棋連盟棋士データベースより棋士情報をスクレイピングするクラス
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
<?php | |
require_once 'vendor/autoload.php'; | |
use Goutte\Client; | |
class Scraper { | |
protected $client; | |
public function __construct() { | |
$this->client = new Client(); | |
} | |
public function getKishi($no, $format = 'array' ) { | |
$data = array(); | |
// 棋士ページをGET | |
$crawler = $this->client->request('GET', 'http://www.shogi.or.jp/player/pro/' . $no . '.html'); | |
// 名前を取得 | |
$dom = $crawler->filter('div.nameArea'); | |
$dom->each(function ($node) use (&$data) { | |
$data['name'] = $node->filter('span')->eq(0)->text(); | |
}); | |
if (!$data) { | |
// 将棋連盟サイトは404ページが無いので名前の取得で正常判断する | |
return false; | |
} | |
// 画像URLを取得 | |
$dom = $crawler->filter('div.imgArea img'); | |
$dom->each(function ($node) use (&$data) { | |
$data['image'] = 'http://www.shogi.or.jp' . $node->attr('src'); | |
}); | |
// 基本情報を取得 | |
$dom = $crawler->filter('div.uniqueLayoutElements03 table.tableElements02 tr'); | |
$dom->each(function ($node) use (&$data) { | |
$th = $node->filter('th')->text(); | |
$td = $node->filter('td')->text(); | |
if ($th == '棋士番号') { | |
$data['no'] = $td; | |
} else if ($th == '生年月日') { | |
$data['birthday'] = $td; | |
} else if ($th == '出身地') { | |
$data['birthplace'] = $td; | |
} else if ($th == '竜王戦') { | |
$data['ryuou'] = $td; | |
} else if ($th == '順位戦') { | |
$data['junni'] = $td; | |
} | |
}); | |
if ($format == 'json') { | |
return json_encode($data); | |
} else { | |
return $data; | |
} | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment