Created
October 12, 2018 09:38
-
-
Save madscientist01/5f3dfdb8f4de57b04e69e0236c1c9de8 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import requests | |
import pandas as pd | |
import datetime | |
from bs4 import BeautifulSoup | |
import os, glob | |
from functools import reduce | |
def get_html(url): | |
_html = "" | |
resp = requests.get(url) | |
if resp.status_code == 200: | |
_html = resp.text | |
return _html | |
# 오늘 데이터를 블러운 후 파싱 | |
html = get_html('http://mobile.kyobobook.co.kr/showcase/book/siteinfo/KOR/9791196283155?orderClick=Ol5') | |
soup = BeautifulSoup(html, 'html.parser') | |
tags=soup.find_all('strong') | |
name = [tag.get_text() for tag in tags] | |
tags2=soup.find_all('a', {'class':'cont'}) | |
num = [int(tag.get_text()) for tag in tags2] | |
today = datetime.date.today() | |
df = pd.DataFrame({'name':name,today:num}) | |
df[today].sum() | |
df.to_csv(str(today)+'.csv') | |
# 그동안 저장된 csv 파일을 불러와서 하나로 합치고, 이것을 current.csv 로 저 | |
os.chdir("/Users/suknamgoong/Dropbox") | |
filelist = glob.glob('2018*.csv') | |
filelist.sort() | |
datalist = [pd.read_csv(file) for file in filelist] | |
newdata = reduce(lambda left,right: pd.merge(left,right), datalist) | |
newdata.to_csv('current.csv') |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment