oiehot · November 4, 2017 04:38
diff --git a/naver_cafe_crawler_wip.py b/naver_cafe_crawler_wip.py
 from selenium import webdriver

 class Article():
 	def __init__(self, id, title, author, date, view_count=0, like_count=0, contents=''):
 		self.id = id
 		self.title = title
 		self.author = author
 		self.date = date
 		self.view_count = view_count
 		self.like_count = like_count
 		self.contents = contents
 	def __repr__(self):
 		return "Article(id='%s', title='%s', author='%s', date='%s', view_count=%d, like_count=%d)" % (self.id, self.title, self.author, self.date, self.view_count, self.like_count)

 class Naver():
 	def __init__(self, driver):
 		self.driver = driver
 		
 	def login(self, id, pw):
 		'로그인'
 		self.driver.get('https://nid.naver.com/nidlogin.login') # 로그인 페이지.
 		self.driver.find_element_by_name('id').send_keys(id) # 아이디 입력.
 		self.driver.find_element_by_name('pw').send_keys(pw) # 비밀번호 입력.
 		self.driver.find_element_by_xpath('//*[@id="frmNIDLogin"]/fieldset/input').click() # 로그인 버튼 클릭.

 	def cafe_search(self, name, keyword):
 		'카페 검색'
 		# TODO: 모든 페이지 검색
 		# TODO: 내부 컨텐츠 크롤링

 		self.driver.get('http://cafe.naver.com/%s' % name) # 카페 메인 페이지.
 		self.driver.find_element_by_xpath('//*[@id="topLayerQueryInput"]').send_keys(keyword) # 검색어 입력.
 		self.driver.find_element_by_xpath('//*[@id="cafe-search"]/form/a').click() # 검색 버튼 클릭.

 		# 컨텐츠는 iframe 안에 있으므로, 컨텍스트를 바꾼다.
 		iframe = self.driver.find_element_by_xpath('//*[@id="cafe_main"]')
 		self.driver.switch_to_frame(iframe)
 		rows = self.driver.find_elements_by_css_selector('#main-area > div:nth-child(8) > form > table > tbody > tr:nth-of-type(3n+1)') # 게시물 행(Row)들을 선택한다.
 		
 		articles = []
 		for row in rows:
 			id = row.find_element_by_xpath('td[1]/span').text.strip()
 			title = row.find_element_by_xpath('td[2]/span').text.strip()
 			author = row.find_element_by_xpath('td[3]').text.strip().split('\n')[1] # ex) '퍼스나콘/아이디 영역\n{ID}'
 			date = row.find_element_by_xpath('td[4]').text.strip() # ex) '12:27', '2017.11.03' # TODO: 정규화
 			view_count = int(row.find_element_by_xpath('td[5]').text.strip())
 			like_count = int(row.find_element_by_xpath('td[6]').text.strip())
 			articles.append( Article(id, title, author, date, view_count, like_count) )
 		self.driver.switch_to_default_content()
 		
 		return articles

 if __name__ == '__main__':
 	# driver = webdriver.PhantomJS('d:/project/a/bin/phantomjs/phantomjs.exe') # http://phantomjs.org/download.html
 	chrome = webdriver.Chrome('d:/project/a/bin/chrome_driver/chromedriver.exe') # https://sites.google.com/a/chromium.org/chromedriver/downloads
 	# driver.implicitly_wait(3) # 암묵적 페이지 로드 대기 시간 설정.
 	naver = Naver(driver=chrome)
 	naver.login(id='{ID}', pw='{PASSWORD}')
 	articles = naver.cafe_search(name='joonggonara', keyword='라이젠')
 	for article in articles:
 		print(article)
	from selenium import webdriver

	class Article():
	def __init__(self, id, title, author, date, view_count=0, like_count=0, contents=''):
	self.id = id
	self.title = title
	self.author = author
	self.date = date
	self.view_count = view_count
	self.like_count = like_count
	self.contents = contents
	def __repr__(self):
	return "Article(id='%s', title='%s', author='%s', date='%s', view_count=%d, like_count=%d)" % (self.id, self.title, self.author, self.date, self.view_count, self.like_count)

	class Naver():
	def __init__(self, driver):
	self.driver = driver

	def login(self, id, pw):
	'로그인'
	self.driver.get('https://nid.naver.com/nidlogin.login') # 로그인 페이지.
	self.driver.find_element_by_name('id').send_keys(id) # 아이디 입력.
	self.driver.find_element_by_name('pw').send_keys(pw) # 비밀번호 입력.
	self.driver.find_element_by_xpath('//*[@id="frmNIDLogin"]/fieldset/input').click() # 로그인 버튼 클릭.

	def cafe_search(self, name, keyword):
	'카페 검색'
	# TODO: 모든 페이지 검색
	# TODO: 내부 컨텐츠 크롤링

	self.driver.get('http://cafe.naver.com/%s' % name) # 카페 메인 페이지.
	self.driver.find_element_by_xpath('//*[@id="topLayerQueryInput"]').send_keys(keyword) # 검색어 입력.
	self.driver.find_element_by_xpath('//*[@id="cafe-search"]/form/a').click() # 검색 버튼 클릭.

	# 컨텐츠는 iframe 안에 있으므로, 컨텍스트를 바꾼다.
	iframe = self.driver.find_element_by_xpath('//*[@id="cafe_main"]')
	self.driver.switch_to_frame(iframe)
	rows = self.driver.find_elements_by_css_selector('#main-area > div:nth-child(8) > form > table > tbody > tr:nth-of-type(3n+1)') # 게시물 행(Row)들을 선택한다.

	articles = []
	for row in rows:
	id = row.find_element_by_xpath('td[1]/span').text.strip()
	title = row.find_element_by_xpath('td[2]/span').text.strip()
	author = row.find_element_by_xpath('td[3]').text.strip().split('\n')[1] # ex) '퍼스나콘/아이디 영역\n{ID}'
	date = row.find_element_by_xpath('td[4]').text.strip() # ex) '12:27', '2017.11.03' # TODO: 정규화
	view_count = int(row.find_element_by_xpath('td[5]').text.strip())
	like_count = int(row.find_element_by_xpath('td[6]').text.strip())
	articles.append( Article(id, title, author, date, view_count, like_count) )
	self.driver.switch_to_default_content()

	return articles

	if __name__ == '__main__':
	# driver = webdriver.PhantomJS('d:/project/a/bin/phantomjs/phantomjs.exe') # http://phantomjs.org/download.html
	chrome = webdriver.Chrome('d:/project/a/bin/chrome_driver/chromedriver.exe') # https://sites.google.com/a/chromium.org/chromedriver/downloads
	# driver.implicitly_wait(3) # 암묵적 페이지 로드 대기 시간 설정.
	naver = Naver(driver=chrome)
	naver.login(id='{ID}', pw='{PASSWORD}')
	articles = naver.cafe_search(name='joonggonara', keyword='라이젠')
	for article in articles:
	print(article)