Last active
July 3, 2020 16:31
-
-
Save kurzweil777/987321d6caec5abe5bb75ce15e014b57 to your computer and use it in GitHub Desktop.
Parsers
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| import requests | |
| from bs4 import BeautifulSoup | |
| import pprint | |
| """This Parser returns Title, Link and a price. Additional functions will be included in a future""" | |
| URL = "https://www.olx.ua/elektronika/kompyutery-i-komplektuyuschie/" | |
| HEADERS = { | |
| 'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) ' | |
| 'Chrome/81.0.4044.138 Safari/537.36 OPR/68.0.3618.191', 'accept': '*/*'} | |
| def get_html(url, params="page=30"): | |
| request = requests.get(url, headers=HEADERS, params=params) | |
| return request | |
| def get_content(html): | |
| soup = BeautifulSoup(html, 'html.parser') | |
| items = soup.find_all('div', class_='offer-wrapper') | |
| computers = [] | |
| for item in items: | |
| computers.append({"Title": item.find("strong").text, | |
| "Link": item.find("a", "detailsLink")["href"], | |
| "Price": item.find("p", "price").getText(strip=True), | |
| "City": item.find("span", "").getText()}) | |
| computers.append("-------------------------------------------------------------------------") | |
| pprint.pprint(computers) | |
| def parse(): | |
| html = get_html(URL) | |
| if html.status_code == 200: | |
| get_content(html.text) | |
| else: | |
| print("Something gone wrong") | |
| if __name__ == "__main__": | |
| parse() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment