|
from .extractor import Extractor |
|
|
|
|
|
class BaseballDetailsExtractor(Extractor): |
|
VALID_URL = r"https?://npb.jp/bis/(?P<year>\d{4})/games/(s\d{8}\d+?\.html)" |
|
|
|
@staticmethod |
|
def default_response(): |
|
return { |
|
"date": "", |
|
"home": {"team": "", "batter": [], "pitcher": []}, |
|
"away": {"team": "", "batter": [], "pitcher": []}, |
|
} |
|
|
|
def _extract(self): |
|
response = self.default_response() |
|
container = self._soup.select_one("div#gmdivtbl") |
|
title = self._soup.select_one("div#gmdivtitle") |
|
response.update({"date": title.text.strip()}) |
|
|
|
table = container.select("td.gmcolorsub") |
|
assert len(table) == 6 |
|
|
|
def find_team_name(soup): |
|
return soup.select_one("td.gmtblteam").text.strip() |
|
|
|
def find_results(soup): |
|
return [ |
|
[e.text for e in status.select("td")] |
|
for status in soup.select("tr.gmstats") |
|
] |
|
|
|
for idt, team in zip(["away", "home"], [table[::2], table[1::2]]): |
|
response[idt].update({"team": find_team_name(team[0])}) |
|
|
|
batter = find_results(team[1]) |
|
batter[0] = ["position", "name", "打数", "安打", "打点", "四球", "死球", "三振"] |
|
pitcher = find_results(team[2]) |
|
pitcher[0] = [ |
|
"status", |
|
"name", |
|
"投回", |
|
"投回sub", |
|
"打者", |
|
"安打", |
|
"四球", |
|
"死球", |
|
"三振", |
|
"自責", |
|
] |
|
response[idt].update({"batter": batter, "pitcher": pitcher}) |
|
|
|
return response |
|
|
|
|
|
if __name__ == "__main__": |
|
from pprint import pprint |
|
url = "https://npb.jp/bis/2021/games/s2021041000947.html" |
|
pprint(BaseballDetailsExtractor(url).extract()) |
|
|
|
# > output |
|
# |
|
# {'away': {'batter': [['position', 'name', '打数', '安打', '打点', '四球', '死球', '三振'], |
|
# ['(中)', '近本', '5', '0', '0', '0', '0', '0'], |
|
# ['(二)', '糸原', '4', '2', '0', '0', '0', '1'], |
|
# ['遊三', '山本', '0', '0', '0', '0', '0', '0'], |
|
# ['(一)', 'マルテ', '3', '0', '0', '1', '0', '1'], |
|
# ['走右', '熊谷', '0', '0', '0', '0', '0', '0'], |
|
# ['(三)一', '大山', '4', '1', '1', '0', '0', '1'], |
|
# ['(左)', 'サンズ', '4', '1', '2', '0', '0', '1'], |
|
# ['左', '板山', '0', '0', '0', '0', '0', '0'], |
|
# ['(右)', '佐藤輝', '4', '1', '0', '0', '0', '1'], |
|
# ['遊', '木浪', '0', '0', '0', '0', '0', '0'], |
|
# ['(捕)', '梅野', '4', '2', '0', '0', '0', '1'], |
|
# ['(遊)二', '中野', '4', '1', '1', '0', '0', '1'], |
|
# ['(投)', '青柳', '1', '0', '0', '0', '0', '0'], |
|
# ['打', '糸井', '1', '0', '0', '0', '0', '1'], |
|
# ['投', '岩崎', '0', '0', '0', '0', '0', '0'], |
|
# ['打', '原口', '1', '0', '0', '0', '0', '1'], |
|
# ['投', 'スアレス', '0', '0', '0', '0', '0', '0']], |
|
# 'pitcher': [['status', |
|
# 'name', |
|
# '投回', |
|
# '投回sub', |
|
# '打者', |
|
# '安打', |
|
# '四球', |
|
# '死球', |
|
# '三振', |
|
# '自責'], |
|
# ['○', '青柳', '7', '', '26', '3', '2', '0', '5', '0'], |
|
# ['H', '岩崎', '1', '', '3', '0', '0', '0', '2', '0'], |
|
# ['', 'スアレス', '1', '', '3', '0', '0', '0', '1', '0']], |
|
# 'team': '阪\u3000神'}, |
|
# 'date': '2021年4月10日 (土)', |
|
# 'home': {'batter': [['position', 'name', '打数', '安打', '打点', '四球', '死球', '三振'], |
|
# ['(右)', '関根', '4', '1', '0', '0', '0', '2'], |
|
# ['(遊)', '柴田', '3', '0', '0', '0', '0', '0'], |
|
# ['打遊', '大和', '1', '0', '0', '0', '0', '0'], |
|
# ['(一)', '牧', '4', '0', '0', '0', '0', '1'], |
|
# ['(左)', '佐野', '3', '1', '0', '1', '0', '1'], |
|
# ['(三)', '宮﨑', '4', '1', '0', '0', '0', '0'], |
|
# ['(中)', '神里', '3', '0', '0', '0', '0', '1'], |
|
# ['(二)', '田中俊', '2', '0', '0', '1', '0', '0'], |
|
# ['(捕)', '戸柱', '3', '0', '0', '0', '0', '0'], |
|
# ['(投)', '上茶谷', '2', '0', '0', '0', '0', '2'], |
|
# ['投', '砂田', '0', '0', '0', '0', '0', '0'], |
|
# ['打', '桑原', '1', '0', '0', '0', '0', '1'], |
|
# ['投', '平田', '0', '0', '0', '0', '0', '0'], |
|
# ['投', '池谷', '0', '0', '0', '0', '0', '0']], |
|
# 'pitcher': [['status', |
|
# 'name', |
|
# '投回', |
|
# '投回sub', |
|
# '打者', |
|
# '安打', |
|
# '四球', |
|
# '死球', |
|
# '三振', |
|
# '自責'], |
|
# ['●', '上茶谷', '7', '', '26', '5', '0', '0', '5', '1'], |
|
# ['', '砂田', '1', '', '3', '0', '0', '0', '2', '0'], |
|
# ['', '平田', '0', '.1', '4', '2', '1', '0', '1', '3'], |
|
# ['', '池谷', '0', '.2', '4', '1', '0', '0', '1', '0']], |
|
# 'team': '横浜DeNA'}} |