Created
November 30, 2018 07:49
-
-
Save imtaehyun/aab809aebe0579b4442ec5d8609a318e to your computer and use it in GitHub Desktop.
중고나라 크롤링
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import requests | |
from bs4 import BeautifulSoup | |
def get_recent_article(menuid, page=1): | |
""" | |
search.menuid: 게시판 별 아이디 | |
search.page: 게시판 페이지 번호 | |
articleid: 게시글 아이디 | |
""" | |
url = f'https://cafe.naver.com/joonggonara/ArticleList.nhn?search.clubid=10050146&search.menuid={menuid}&search.boardtype=L&search.page={page}&userDisplay=50' | |
s = requests.Session() | |
response = s.get(url) | |
# print(response.text) | |
soup = BeautifulSoup(response.text, 'html.parser') | |
board = soup.select('div.article-board')[1] | |
for article in board.select('tr'): | |
# print(article) | |
try: | |
articleid = article.select('.board-number .inner_number')[0].text.strip() | |
title = article.select('a.article')[0].text.strip() | |
link = article.select('a.article')[0]['href'] | |
date = article.select('td.td_date')[0].text.strip() | |
print(articleid, title, link, date) | |
except Exception as e: | |
pass | |
def get_article_content(menuid, articleid): | |
url = f'https://cafe.naver.com/joonggonara/ArticleRead.nhn?clubid=10050146&page=1&menuid={menuid}&boardtype=L&articleid={articleid}&referrerAllArticles=false' | |
s = requests.Session() | |
response = s.get(url) | |
get_recent_article(menuid='338') |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment