Last active
November 1, 2019 15:26
-
-
Save yteraoka/b217dae177bc6f360830da72f364b143 to your computer and use it in GitHub Desktop.
Wikipedia から今日が何の日かを抽出
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import random | |
import requests | |
from bs4 import BeautifulSoup | |
from datetime import date | |
today = date.today() | |
url = 'https://ja.wikipedia.org/wiki/{}月{}日'.format(today.month, today.day) | |
events = [] | |
r = requests.get(url) | |
soup = BeautifulSoup(r.text, 'lxml') | |
dekigoto = soup.select('span[id="できごと"]') | |
elm = dekigoto[0] | |
# できごとの後にでてくる最初の <ul> まで進める | |
while True: | |
if elm.name == "ul": | |
break | |
elm = elm.next_element | |
# <ul> 内の <li> のテキストをリストに入れる | |
for li in elm.find_all("li"): | |
events.append(li.text) | |
max = len(events) | |
n = 3 if len(events) >= 3 else len(events) | |
for event in sorted(random.sample(events, n)): | |
print(event) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment