Skip to content

Instantly share code, notes, and snippets.

@allieus
Created March 25, 2017 12:17
Show Gist options
  • Save allieus/9d12ee13f0ae9dba9ce3d8740ed23576 to your computer and use it in GitHub Desktop.
Save allieus/9d12ee13f0ae9dba9ce3d8740ed23576 to your computer and use it in GitHub Desktop.
국회의원 목록 크롤링 (python2) 테스트 코드
# -*- coding: utf-8 -*-
from __future__ import unicode_literals
import sys
import re
import requests
from bs4 import BeautifulSoup
import time
url = "http://www.assembly.go.kr/assm/memact/congressman/memCond/memCondListAjax.do?currentPage=1&rowPerPage=1000"
resp = requests.get(url)
resp.encoding = 'utf8'
html = resp.text
html = re.sub(r'\s{1,2}[0-9a-f]{3,4}\s{1,2}', '', html)
soup = BeautifulSoup(html, "html.parser")
for member_tag in soup.select('.memberna_list dl dt a'):
name = member_tag.text
link = member_tag['href']
matched = re.search(r'\d+', link)
if matched:
member_id = matched.group(0)
else:
member_id = None
print('{} - {}'.format(name, member_id))
print(sys.version)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment