Skip to content

Instantly share code, notes, and snippets.

@dongkwan-kim
Created November 23, 2017 09:19
Show Gist options
  • Save dongkwan-kim/699edab9b6b7575cadf2fdf7aef3b4c7 to your computer and use it in GitHub Desktop.
Save dongkwan-kim/699edab9b6b7575cadf2fdf7aef3b4c7 to your computer and use it in GitHub Desktop.
from selenium import webdriver
from time import sleep
import csv
def run_driver(path):
driver = webdriver.Chrome(path)
driver.implicitly_wait(3)
return driver
def get_site(driver, url):
driver.get(url)
def click_prev(driver):
driver.find_element_by_class_name("bx-prev").click()
def get_current_month(driver):
month = driver.find_element_by_css_selector(".month-list.active")
return month.text
def get_seoul_news_list(driver):
news = driver.find_elements_by_class_name("newspaper")
return news
class SeoulNews():
def __init__(self, webelem, month):
self.url = webelem.find_element_by_tag_name("a").get_attribute("href")
self.media = webelem.find_element_by_class_name("media").text
self.date = webelem.find_element_by_class_name("date").text
self.title = webelem.find_element_by_class_name("title").text
self.category = webelem.find_element_by_class_name("category").text
manifesto = webelem.find_element_by_class_name("manifesto").text
self.man_num, self.man_text = manifesto.split("\n")
self.month = month
def __str__(self):
return self.title
def export_dict(self):
return self.__dict__
def writer_csv(filename, fieldnames):
f = open(filename, "w", encoding="utf-8")
wtr = csv.DictWriter(f, fieldnames=fieldnames)
wtr.writeheader()
return wtr
def main(num):
PATH = "/Users/todo/chromedriver"
URL = "http://mayor.seoul.go.kr/app/seoul/news.do"
driver = run_driver(PATH)
get_site(driver, URL)
fields = ["man_num", "man_text", "month", "title", "media", "date", "category", "url"]
wtr = writer_csv("seoulnews.csv", fields)
for _ in range(num):
news_list = get_seoul_news_list(driver)
month = get_current_month(driver)
for news_elem in news_list:
try:
sn = SeoulNews(news_elem, month)
wtr.writerow(sn.export_dict())
except:
print(news_elem.text)
click_prev(driver)
sleep(1)
if __name__ == '__main__':
main(55)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment