Skip to content

Instantly share code, notes, and snippets.

@madscientist01
Created October 12, 2018 09:38
Show Gist options
  • Save madscientist01/5f3dfdb8f4de57b04e69e0236c1c9de8 to your computer and use it in GitHub Desktop.
Save madscientist01/5f3dfdb8f4de57b04e69e0236c1c9de8 to your computer and use it in GitHub Desktop.
import requests
import pandas as pd
import datetime
from bs4 import BeautifulSoup
import os, glob
from functools import reduce
def get_html(url):
_html = ""
resp = requests.get(url)
if resp.status_code == 200:
_html = resp.text
return _html
# 오늘 데이터를 블러운 후 파싱
html = get_html('http://mobile.kyobobook.co.kr/showcase/book/siteinfo/KOR/9791196283155?orderClick=Ol5')
soup = BeautifulSoup(html, 'html.parser')
tags=soup.find_all('strong')
name = [tag.get_text() for tag in tags]
tags2=soup.find_all('a', {'class':'cont'})
num = [int(tag.get_text()) for tag in tags2]
today = datetime.date.today()
df = pd.DataFrame({'name':name,today:num})
df[today].sum()
df.to_csv(str(today)+'.csv')
# 그동안 저장된 csv 파일을 불러와서 하나로 합치고, 이것을 current.csv 로 저
os.chdir("/Users/suknamgoong/Dropbox")
filelist = glob.glob('2018*.csv')
filelist.sort()
datalist = [pd.read_csv(file) for file in filelist]
newdata = reduce(lambda left,right: pd.merge(left,right), datalist)
newdata.to_csv('current.csv')
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment