This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import pandas as pd | |
col_dict = {'title':titles, 'price':prices, 'rating':ratings} | |
book_store = pd.DataFrame(col_dict) | |
print(book_store.head()) | |
print(book_store.dtypes) | |
book_store['price'] = book_store['price'].apply(lambda x : float(x[2:])) | |
number_mapping = {'One':1,'Two':2,'Three':3,'Four':4,'Five':5} |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
title = [bk for tupl in book_list for bk in tupl[0]] | |
prices = [price for tupl in book_list for price in tupl[1]] | |
ratings = [star for tupl in book_list for star in tupl[2]] |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import requests | |
from bs4 import BeautifulSoup | |
import time | |
import random | |
import numpy as np | |
from multiprocessing import Pool | |
url_list = [] | |
pages = np.arange(1,51,1) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
p = Pool(10) | |
book_list = p.map(scrape_url, url_list) | |
p.terminate() | |
p.join() |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
generate_urls() | |
for i in url_list[:10]: | |
print(i) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
def scrape_url(url): | |
book_title = [] | |
star_rating = [] | |
product_price = [] | |
time.sleep(random.randint(1,10)) | |
results = requests.get(url) | |
soup = BeautifulSoup(results.text, 'html.parser') | |
book_div = soup.find_all('li', class_='col-xs-6 col-sm-4 col-md-3 col-lg-3') |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
def generate_urls(): | |
for page in pages: | |
url = 'http://books.toscrape.com/catalogue/page-' + str(page) + '.html' | |
url_list.append(url) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import requests | |
from bs4 import BeautifulSoup | |
import time | |
import random | |
import numpy as np | |
from multiprocessing import Pool | |
url_list = [] | |
pages = np.arange(1,51,1) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import requests | |
from requests import get | |
from bs4 import BeautifulSoup | |
import numpy as np | |
import time | |
import random | |
pages = np.arange(1,51,1) | |
book_title = [] |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
book_store.to_csv('book_store.csv') |
NewerOlder