Skip to content

Instantly share code, notes, and snippets.

@wicksome
Created August 2, 2014 16:30
Show Gist options
  • Save wicksome/49bbc8d42a4d1f08fdfd to your computer and use it in GitHub Desktop.
Save wicksome/49bbc8d42a4d1f08fdfd to your computer and use it in GitHub Desktop.
BeautifulSoup로 웹 일부분 추출하기
# -*- coding: UTF-8 -*-
from bs4 import BeautifulSoup
import codecs
def get_html():
with codecs.open('melon_weekly.html', 'r', "utf-8") as f:
html = f.read()
return html
def extract_chart():
html = get_html() # 저장된 파일을 사용합니다.
# BeautifulSoup 로 파싱
soup = BeautifulSoup(html)
# div 태그 중 id가 top50, top100인 것을 찾아냄
top50 = soup.find('div', id='tb_list')
top100 = soup.find('tr', id='lst100')
with codecs.open('top50.html', 'w', "utf-8") as f:
f.write(top50.prettify())
with codecs.open('top100.html', 'w', "utf-8") as f:
f.write(top100.prettify())
if __name__ == '__main__':
extract_chart()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment