Skip to content

Instantly share code, notes, and snippets.

@allieus
Last active November 7, 2017 00:51
Show Gist options
  • Save allieus/8ee93da2e3cef6dd1ef165b1781d3ac4 to your computer and use it in GitHub Desktop.
Save allieus/8ee93da2e3cef6dd1ef165b1781d3ac4 to your computer and use it in GitHub Desktop.
장고걸스 2017년 11월 세미나 - Azure Functions을 활용한 파이썬 크롤링 스케줄링
import os
import json
import time
import requests
from bs4 import BeautifulSoup
def get_realtime_keywords():
'네이버 검색어 크롤링을 하고 ...'
html = requests.get('https://www.naver.com/').text
soup = BeautifulSoup(html, 'html.parser')
tags = soup.select('.PM_CL_realtimeKeyword_rolling_base .ah_k')
keywords = [tag.text for tag in tags]
return keywords
def insert(partition_key, row_key, **kwargs):
'파이썬 기본문법만으로 Azure Table NoSQL에 INSERT를 합니다.'
doc = dict(
PartitionKey=partition_key,
RowKey=row_key,
**kwargs)
tablePath = os.environ['tablePath']
with open(tablePath, 'wt', encoding='utf8') as f:
json.dump(doc, f)
if __name__ == '__main__':
# 키워드를 긁어와서
keywords = get_realtime_keywords()
# Azure Table에 추가
insert('naver_realtime_keywords', int(time.time()),
keywords=keywords)
beautifulsoup4
requests
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment