Skip to content

Instantly share code, notes, and snippets.

@kurzweil777
Last active July 3, 2020 16:31
Show Gist options
  • Save kurzweil777/987321d6caec5abe5bb75ce15e014b57 to your computer and use it in GitHub Desktop.
Save kurzweil777/987321d6caec5abe5bb75ce15e014b57 to your computer and use it in GitHub Desktop.
Parsers
import requests
from bs4 import BeautifulSoup
import pprint
"""This Parser returns Title, Link and a price. Additional functions will be included in a future"""
URL = "https://www.olx.ua/elektronika/kompyutery-i-komplektuyuschie/"
HEADERS = {
'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) '
'Chrome/81.0.4044.138 Safari/537.36 OPR/68.0.3618.191', 'accept': '*/*'}
def get_html(url, params="page=30"):
request = requests.get(url, headers=HEADERS, params=params)
return request
def get_content(html):
soup = BeautifulSoup(html, 'html.parser')
items = soup.find_all('div', class_='offer-wrapper')
computers = []
for item in items:
computers.append({"Title": item.find("strong").text,
"Link": item.find("a", "detailsLink")["href"],
"Price": item.find("p", "price").getText(strip=True),
"City": item.find("span", "").getText()})
computers.append("-------------------------------------------------------------------------")
pprint.pprint(computers)
def parse():
html = get_html(URL)
if html.status_code == 200:
get_content(html.text)
else:
print("Something gone wrong")
if __name__ == "__main__":
parse()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment