Created
May 6, 2017 14:44
-
-
Save naturale0/ce8fa27f0c8b28cc0c5a63f98a66beab to your computer and use it in GitHub Desktop.
네이버 금융에서 일별 종가를 읽어오는 클래스 (http://estenpark.tistory.com/353 참고 - 이걸 그대로 클래스로 구현함)
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
## http://estenpark.tistory.com/353 참고 (거의 그대로 가져옴) | |
import urllib | |
import time | |
import sys | |
from urllib import urlopen | |
from bs4 import BeautifulSoup | |
class NaverStockCrawler(object): | |
"""네이버 금융에서 일별 종가를 읽어오는 클래스""" | |
def __init__(self, stock_item): | |
self.stock_item = stock_item | |
self.url = 'http://finance.naver.com/item/sise_day.nhn?code='+ stock_item | |
self.items = OrderedDict() | |
def get(self, days=40): | |
url = 'http://finance.naver.com/item/sise_day.nhn?code='+ self.stock_item | |
html = urlopen(self.url) | |
source = BeautifulSoup(html.read(), "html.parser") | |
maxPage=source.find_all("table", align="center") | |
mp = maxPage[0].find_all("td", class_="pgRR") | |
mpNum = int(mp[0].a.get('href')[-3:]) | |
pages = days//10 if days%10==0 else days//10 + 1 | |
for page in range(1, pages+1): | |
sys.stdout.write("\r* crawling page " + str(page) + "...") | |
sys.stdout.flush() | |
page_url = self.url +'&page='+ str(page) | |
html = urlopen(page_url) | |
source = BeautifulSoup(html.read(), "html.parser") | |
srlists=source.find_all("tr") | |
isCheckNone = None | |
if((page % 1) == 0): | |
time.sleep(1.40) | |
for i in range(1,len(srlists)-1): | |
if(srlists[i].span != isCheckNone): | |
date = srlists[i].find_all("td",align="center")[0].text | |
price = srlists[i].find_all("td",class_="num")[0].text | |
self.items[date] = float(price.replace(",", "")) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment