Last active
October 30, 2018 13:21
-
-
Save wakusei-meron-/c269e54e542ee9cc20dc0c5ad2eaf40b to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import urllib3 | |
from bs4 import BeautifulSoup | |
## データベースの準備 | |
import psycopg2 | |
conn = psycopg2.connect("user=USER_NAME dbname=dev host=127.0.0.1") | |
cur = conn.cursor() | |
# URL | |
url = "https://indexes.nikkei.co.jp/nkave/index/component?idx=nk225#C27" | |
# HTTPクライアントの作成 | |
mgr = urllib3.PoolManager() | |
# リクエストを投げる | |
r = mgr.request("GET", url) | |
# htmlをパースする | |
body = r.data.decode("utf-8") | |
soup = BeautifulSoup(body, "html.parser") | |
# 必要な情報を取得する | |
for div in soup.find_all("div"): | |
rdivs = div.find_all("div", class_="col-xs-12 col-sm-8") | |
for rdiv in rdivs: | |
rdiv_divs = rdiv.find_all("div") | |
for rdiv_div in rdiv_divs: | |
rclasses = rdiv_div.get("class") | |
if "component-category" in rclasses: | |
print(rdiv_div.find_all("div")[0].string) | |
if "component-list" in rclasses: | |
list_divs = rdiv_div.find_all("div") | |
code = list_divs[0].string | |
traiding_name = list_divs[1].string | |
name = list_divs[2].string | |
cur.execute("INSERT INTO company (code, name, traiding_name) VALUES (%s, %s, %s)", (code, name, traiding_name)) | |
print(code, traiding_name, name) | |
cur.execute("SELECT * FROM company;") | |
cur.fetchall() | |
conn.commit() | |
cur.close() | |
conn.close() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment