Last active
January 25, 2019 06:43
-
-
Save imtaehyun/4dd1a4dea3f9518c71b71036ea944f1b to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import requests | |
from bs4 import BeautifulSoup | |
from pprint import pprint | |
import json | |
import time | |
def get_list(): | |
"""메인 -> 식품,생활,유아동 -> 유아동,출산 -> 기저귀""" | |
# TODO infinite scroll 처리 | |
s = requests.Session() | |
response = s.get('http://www.wemakeprice.com/main/100700/100708') | |
response.encoding = 'utf-8' | |
if response.status_code == 200: | |
# print(response.text) | |
soup = BeautifulSoup(response.text, 'html.parser') | |
result = list() | |
for item in soup.select('.content-main > .group_list .type03'): | |
title = item.select('.tit_desc')[0].string | |
price = item.select('.sale')[0].contents[0].replace(',', '') | |
link = item.select('a')[0].attrs['href'] | |
if 'https' not in link: | |
link = 'http://www.wemakeprice.com' + link | |
page_type = None | |
if 'adeal' in link: | |
page_type = 'adeal' | |
elif 'deal' in link: | |
page_type = 'deal' | |
elif 'product' in link: | |
page_type = 'product' | |
print(page_type, title, price, link) | |
result.append((page_type, title, price, link)) | |
return result | |
def get_list_from_wonder_deliver(): | |
"""메인 -> 원더배송 -> 기저귀""" | |
s = requests.Session() | |
response = s.get('https://front.wemakeprice.com/wonder/division/6000333') | |
response.encoding = 'utf-8' | |
if response.status_code == 200: | |
# print(response.text) | |
soup = BeautifulSoup(response.text, 'html.parser') | |
result = list() | |
for item in soup.select('div.box_imagedeal a'): | |
link = item['href'] | |
img_url = item.select('div.item_img img')[0]['src'] | |
title = item.select('div.item_cont div.option_txt p.text')[ | |
0].string | |
price = item.select( | |
'div.item_cont div.option_txt div.price_info strong em')[0].string.replace(',', '') | |
page_type = None | |
if 'adeal' in link: | |
page_type = 'adeal' | |
elif 'deal' in link: | |
page_type = 'deal' | |
elif 'product' in link: | |
page_type = 'product' | |
result.append((page_type, title, link, price)) | |
return result | |
def get_page(): | |
s = requests.Session() | |
response = s.get('https://front.wemakeprice.com/deal/600073392') | |
response.encoding = 'utf-8' | |
if response.status_code == 200: | |
# print(response.text) | |
soup = BeautifulSoup(response.text, 'html.parser') | |
print(len(soup.select('#_productInfo li'))) | |
for item in soup.select('#_productInfo li'): | |
title = item.select('p.text')[0].string | |
price = item.select('.price .sale .num')[0].string.replace(',', '') | |
img = item.select('.item_img a img')[0].attrs['src'] | |
print(title, price, img) | |
def get_page_deal_type(deal_id): | |
s = requests.Session() | |
response = s.get('https://front.wemakeprice.com/deal/{}'.format(deal_id)) | |
response.encoding = 'utf-8' | |
if response.status_code == 200: | |
# print(response.text) | |
soup = BeautifulSoup(response.text, 'html.parser') | |
script = soup.findAll('script')[2].string | |
data = script.split('JSON.parse(\'', 1)[1].split('\'));')[0] | |
dict_data = json.loads(data) | |
dealNo = dict_data['dealNo'] # 딜 번호 | |
dealNm = dict_data['dealNm'] # 딜 이름 | |
dcateNm = dict_data['dcateNm'] # 브랜드명 | |
for product in dict_data['prodSimpleList']: | |
prodNo = product['prodNo'] | |
prodNm = product['prodNm'] | |
salePrice = product['salePrice'] | |
discountYn = product['discountYn'] | |
discountRate = product['discountRate'] | |
discountPrice = product['discountPrice'] | |
mainImg = product['mainImg']['largeImg']['imgUrl'] | |
mainImgHeight = product['mainImg']['largeImg']['imgHeight'] | |
mainImgWidth = product['mainImg']['largeImg']['imgWidth'] | |
originPriceYn = product['originPriceYn'] | |
if discountYn == 'Y': | |
print('discount: ', prodNo, prodNm) | |
def get_page_adeal_type(deal_id): | |
r = requests.get( | |
'http://www.wemakeprice.com/c/deal_option/get_option_info/{}'.format(deal_id)) | |
result = r.json()['result_set'] | |
main_name = result['main_name'] | |
print(main_name) | |
for product in result['option_info']['list']: | |
prodNo = product['value']['option_id'] | |
prodNm = product['value']['option_value'] | |
salePrice = product['value']['option_price'] | |
print(prodNo, prodNm, salePrice) | |
# for page in get_list_from_wonder_deliver(): | |
# if page[0] is 'deal': | |
# deal_id = page[2].replace('//front.wemakeprice.com/deal/', '') | |
# print(page[1], deal_id) | |
# get_page_deal_type(deal_id) | |
# time.sleep(1) | |
# get_page_deal_type() | |
# get_page_deal_type('600046043') | |
# 정리 | |
# product -> 상품명 / 이미지 / 링크 / 가격 | |
# deal -> get_page_deal_type() | |
# adeal -> get_page_adeal_type() | |
# get_page_adeal_type('4218897') | |
# get_list() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment