Skip to content

Instantly share code, notes, and snippets.

@lu911
Last active December 21, 2015 01:29
Show Gist options
  • Save lu911/6228426 to your computer and use it in GitHub Desktop.
Save lu911/6228426 to your computer and use it in GitHub Desktop.
naver movie reply crawl
import re, urllib, httplib, time
class Mining(object):
r = re.compile(ur'class="score_reple">\s*<p>(.*?)</p>')
def __init__(self, code):
positive = []
negative = []
for page in range(1,2):
[positive.append(data) for data in self.get_data(code, 'highest', page)]
time.sleep(2)
[negative.append(data) for data in self.get_data(code, 'lowest', page)]
time.sleep(2)
data = {u'positive':positive, u'negative':negative}
print data
def get_data(self, code, type, page):
url = 'http://movie.naver.com/movie/bi/mi/pointWriteFormList.nhn?code=%s&type=after&order=%s&page=%s'%(code, type, page)
f = urllib.urlopen(url)
data = f.read()
return self.r.findall(data)
m = Mining(62328)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment