hannesdatta · September 14, 2023 13:42
diff --git a/scraper.py b/scraper.py
 # FINAL CODE
 import requests
 from bs4 import BeautifulSoup

 # Define the URL and user-agent header
 url = 'https://www.coolblue.nl/tweedekans-product/2191236'
 headers = {
    'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 '
                  '(KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.36'
 }

 # Send an HTTP GET request to the URL with the specified headers
 response = requests.get(url, headers=headers)
 response.encoding = response.apparent_encoding

 # Extract the HTML source code from the response
 source_code = response.text

 # Parse the HTML using BeautifulSoup
 soup = BeautifulSoup(source_code, 'html.parser')

 # Extract product information
 product_name = soup.find(class_='js-product-name').get_text().strip()
 former_price = soup.find(class_='sales-price__former-price').get_text().strip()
 current_price = soup.find(class_='sales-price__current js-sales-price-current').get_text().strip()
 reviews = soup.find(class_='review-rating__reviews text--truncate').get_text().strip()
 current_state = soup.find('li', class_='inline-list__item js-inline-list-item').get_text().strip()

 # Extract additional information from the status list
 status = soup.find('ul', attrs={'class': 'list list--bullet'})
 status_items = status.find_all('li')

 # Create a list to store the extracted status information
 status_info = []

 for state in status_items:
    status_info.append(state.get_text().strip())

 # Print the extracted information
 print("Status Information:")
 for item in status_info:
    print(item)

 print("Product Name:", product_name)
 print("Former Price:", former_price)
 print("Current Price:", current_price)
 print("Reviews:", reviews[0:7].strip(), reviews[7:20].strip())
 print("Current State:", current_state)
diff --git a/wrong_code.py b/wrong_code.py
 # ORIGINAL CODE RECEIVED BY STUDENT FOR DEUBGGING
 import requests
 from bs4 import BeautifulSoup

 url = 'https://www.coolblue.nl/tweedekans-product/2191236'
 header= {'User-agent' : 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.36'}
 phone_request = requests.get(url, headers = header)
 phone_request.encoding = phone_request.apparent_encoding
 phone_source_code = phone_request.text

 soup = BeautifulSoup(phone_source_code)

 product_name = soup.find(class_='js-product-name').get_text().strip()
 former_price = soup.find(class_='sales-price__former-price').get_text().strip()
 current_price = soup.find(class_='sales-price__current js-sales-price-current').get_text().strip()
 reviews = soup.find(class_='review-rating__reviews text--truncate').get_text().strip()
 current_state = soup.find('li', class_='inline-list__item js-inline-list-item').get_text().strip()



 status = soup.find('ul',attrs={'class':'list list--bullet'})
 status_phone = status.find_all('li')

 out = []

 for state in status_phone: out.append(status_phone.get_text())
 out

 print(product_name)
 print(former_price)
 print(current_price)
 print(reviews[0:7].strip())
 print(reviews[7:20].strip())
 print(current_state)
	# FINAL CODE
	import requests
	from bs4 import BeautifulSoup

	# Define the URL and user-agent header
	url = 'https://www.coolblue.nl/tweedekans-product/2191236'
	headers = {
	'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 '
	'(KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.36'
	}

	# Send an HTTP GET request to the URL with the specified headers
	response = requests.get(url, headers=headers)
	response.encoding = response.apparent_encoding

	# Extract the HTML source code from the response
	source_code = response.text

	# Parse the HTML using BeautifulSoup
	soup = BeautifulSoup(source_code, 'html.parser')

	# Extract product information
	product_name = soup.find(class_='js-product-name').get_text().strip()
	former_price = soup.find(class_='sales-price__former-price').get_text().strip()
	current_price = soup.find(class_='sales-price__current js-sales-price-current').get_text().strip()
	reviews = soup.find(class_='review-rating__reviews text--truncate').get_text().strip()
	current_state = soup.find('li', class_='inline-list__item js-inline-list-item').get_text().strip()

	# Extract additional information from the status list
	status = soup.find('ul', attrs={'class': 'list list--bullet'})
	status_items = status.find_all('li')

	# Create a list to store the extracted status information
	status_info = []

	for state in status_items:
	status_info.append(state.get_text().strip())

	# Print the extracted information
	print("Status Information:")
	for item in status_info:
	print(item)

	print("Product Name:", product_name)
	print("Former Price:", former_price)
	print("Current Price:", current_price)
	print("Reviews:", reviews[0:7].strip(), reviews[7:20].strip())
	print("Current State:", current_state)
	# ORIGINAL CODE RECEIVED BY STUDENT FOR DEUBGGING
	import requests
	from bs4 import BeautifulSoup

	url = 'https://www.coolblue.nl/tweedekans-product/2191236'
	header= {'User-agent' : 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.36'}
	phone_request = requests.get(url, headers = header)
	phone_request.encoding = phone_request.apparent_encoding
	phone_source_code = phone_request.text

	soup = BeautifulSoup(phone_source_code)

	product_name = soup.find(class_='js-product-name').get_text().strip()
	former_price = soup.find(class_='sales-price__former-price').get_text().strip()
	current_price = soup.find(class_='sales-price__current js-sales-price-current').get_text().strip()
	reviews = soup.find(class_='review-rating__reviews text--truncate').get_text().strip()
	current_state = soup.find('li', class_='inline-list__item js-inline-list-item').get_text().strip()



	status = soup.find('ul',attrs={'class':'list list--bullet'})
	status_phone = status.find_all('li')

	out = []

	for state in status_phone: out.append(status_phone.get_text())
	out

	print(product_name)
	print(former_price)
	print(current_price)
	print(reviews[0:7].strip())
	print(reviews[7:20].strip())
	print(current_state)