Created
October 12, 2018 09:38
-
-
Save madscientist01/8235032017c2653140154a385a56cdbb to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import requests | |
import pandas as pd | |
import datetime | |
from bs4 import BeautifulSoup | |
import os, glob | |
from functools import reduce | |
def get_html(url): | |
_html = "" | |
resp = requests.get(url) | |
if resp.status_code == 200: | |
_html = resp.text | |
return _html | |
# 오늘 데이터를 블러운 후 파싱 | |
html = get_html('http://mobile.kyobobook.co.kr/showcase/book/siteinfo/KOR/9791196283155?orderClick=Ol5') | |
soup = BeautifulSoup(html, 'html.parser') | |
tags=soup.find_all('strong') | |
name = [tag.get_text() for tag in tags] | |
tags2=soup.find_all('a', {'class':'cont'}) | |
num = [int(tag.get_text()) for tag in tags2] | |
today = datetime.date.today() | |
df = pd.DataFrame({'name':name,today:num}) | |
df[today].sum() | |
df.to_csv(str(today)+'.csv') | |
# 그동안 저장된 csv 파일을 불러와서 하나로 합치고, 이것을 current.csv 로 저 | |
os.chdir("/Users/suknamgoong/Dropbox") | |
filelist = glob.glob('2018*.csv') | |
filelist.sort() | |
datalist = [pd.read_csv(file) for file in filelist] | |
newdata = reduce(lambda left,right: pd.merge(left,right), datalist) | |
newdata.to_csv('current.csv') |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment