Skip to content

Instantly share code, notes, and snippets.

@Beomi
Created February 26, 2019 07:29
Show Gist options
  • Save Beomi/3d950041d7c66212763953be59832382 to your computer and use it in GitHub Desktop.
Save Beomi/3d950041d7c66212763953be59832382 to your computer and use it in GitHub Desktop.
네이버 지식인 목록 불러오기
import requests
from bs4 import BeautifulSoup as bs
def get_naver_kin_urls(query, max_page=100):
page = 0
headers = {
'authority': 'm.search.naver.com',
'pragma': 'no-cache',
'cache-control': 'no-cache',
'upgrade-insecure-requests': '1',
'user-agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_14_3) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/72.0.3626.119 Safari/537.36',
'dnt': '1',
'accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8',
'referer': 'https://m.search.naver.com/search.naver',
'accept-encoding': 'gzip, deflate, br',
'accept-language': 'ko-KR,ko;q=0.9,en-US;q=0.8,en;q=0.7',
}
urls = []
while page <= max_page:
params = (
('where', 'm_kin'),
('query', query),
('start', page * 15 + 1),
('display', '15'),
('sm', 'mtb_pge'),
('kin_sort', '0'),
('kin_tag', '0'),
('m_answer', '0'),
('nso', 'so:r,a:all,p:all'),
)
response = requests.get('https://m.search.naver.com/search.naver', headers=headers, params=params)
soup = bs(response.text, 'html.parser')
urls += [i['href'] for i in soup.select('a.elss.total_tit')]
page += 1
return urls
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment