Skip to content

Instantly share code, notes, and snippets.

@iidx
Created April 1, 2016 08:20
Show Gist options
  • Save iidx/3e815bf515b26c320f430d4aa5e9f355 to your computer and use it in GitHub Desktop.
Save iidx/3e815bf515b26c320f430d4aa5e9f355 to your computer and use it in GitHub Desktop.
getharim.py
# -*- coding: utf-8 -*-
import urllib2 as u
import re
def GetDataFromURL(url):
req = u.Request(url)
stream = u.urlopen(req).read()
return stream
def GetHarim():
harim_regex = r"<a\shref=\"bbsView\.php\?id=([0-9]+)&page=1&code=bbs_menu01\">"
base_url = "http://www.pvv.co.kr/bbs/index.php?code=bbs_menu01"
try:
stream = GetDataFromURL(base_url)
bbs_id = re.search(harim_regex, stream).group(1)
except:
print "[!] error in GetHarim() - base data parse"
return None
try:
bbs_url = "http://www.pvv.co.kr/bbs/bbsView_body.php?id={}&code=bbs_menu01".format(bbs_id)
bbs_regex = r"(\d{2}/\d{2})(.+)\s?<br\s/>"
stream = GetDataFromURL(bbs_url)
bbs_data = re.findall(bbs_regex, stream)
return bbs_data
except Exception as e:
print e
print "[!] error in GetHarim() - bbs data parse"
return None
@iidx
Copy link
Author

iidx commented Apr 1, 2016

하림 진자 싫다...

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment