Created
August 2, 2014 16:30
-
-
Save wicksome/49bbc8d42a4d1f08fdfd to your computer and use it in GitHub Desktop.
BeautifulSoup로 웹 일부분 추출하기
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# -*- coding: UTF-8 -*- | |
from bs4 import BeautifulSoup | |
import codecs | |
def get_html(): | |
with codecs.open('melon_weekly.html', 'r', "utf-8") as f: | |
html = f.read() | |
return html | |
def extract_chart(): | |
html = get_html() # 저장된 파일을 사용합니다. | |
# BeautifulSoup 로 파싱 | |
soup = BeautifulSoup(html) | |
# div 태그 중 id가 top50, top100인 것을 찾아냄 | |
top50 = soup.find('div', id='tb_list') | |
top100 = soup.find('tr', id='lst100') | |
with codecs.open('top50.html', 'w', "utf-8") as f: | |
f.write(top50.prettify()) | |
with codecs.open('top100.html', 'w', "utf-8") as f: | |
f.write(top100.prettify()) | |
if __name__ == '__main__': | |
extract_chart() | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment