Skip to content

Instantly share code, notes, and snippets.

@Denniskamau
Created April 14, 2020 14:23
Show Gist options
  • Save Denniskamau/dbdede277fb30bbf91ba3841ccebea35 to your computer and use it in GitHub Desktop.
Save Denniskamau/dbdede277fb30bbf91ba3841ccebea35 to your computer and use it in GitHub Desktop.
start scrapping method
...
def startScrapping(self,items,book_data):
#Get the current working directory
current_directory = os.getcwd()
# Create a folder named books to store the srapped images
path = os.path.join(current_directory,r"books")
self.createDirectory(path)
counter = 1
#Loop through the product list
for book in book_data:
try:
product = book.find('div',class_="product")
url = product.find('a')
full_url = url.get('href')
page=urllib.request.urlopen('https://textbookcentre.com'+full_url)
soup = BeautifulSoup(page, 'html.parser')
data = soup.find('article',class_='product_page')
image = data.find('div',id='product-images')
image = image.find('a')
image_url = image.get('href')
#Get the title of the book so as the save each book with its title
title_data = data.find('div',class_='col-sm-6 product_main')
title = title_data.find('h1')
fullpath = os.path.join(path,title.text)
#Save the book
urllib.request.urlretrieve('https://textbookcentre.com'+image_url, " {}/{}.jpg".format(path,title.text))
if counter == items:
print('INFO: finished')
return counter
else:
print('INFO: saved {} {}'.format(title.text,counter))
counter +=1
except Exception as e:
print('ERROR:',e)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment