Skip to content

Instantly share code, notes, and snippets.

@blzzua
Created June 21, 2021 17:43
Show Gist options
  • Save blzzua/fe3310413ac717ad88a8926efb5733fe to your computer and use it in GitHub Desktop.
Save blzzua/fe3310413ac717ad88a8926efb5733fe to your computer and use it in GitHub Desktop.
londonwinecompetition.py
import pandas as pd
import requests
from bs4 import BeautifulSoup
def get_html(url):
r = requests.get(url=url)
return r.text
def parse_html(html):
soup = BeautifulSoup(html, 'lxml')
res = []
main_data = soup.find('div', class_='isotopeGrid')
griditems = main_data.find_all('article', class_='isotopeGridItem')
for wine in griditems:
name = wine.find('div', class_='txt').h1.text
d = {'Wine name': name }
for li in wine.ul.find_all('li'):
li_name = li.strong.text
li_value = li.strong.next_sibling
if li_name and li_value:
d[li_name.strip(':')] = str(li_value).strip()
res.append(d)
return res
if __name__ == '__main__':
years = (2018, 2019, 2020, 2021)
table = []
for year in years:
url = f'https://londonwinecompetition.com/en/competition-global-results/{year}/'
print(f'download {url}')
html = get_html(url)
print(f'parsing {year}...')
table += parse_html(html)
df = pd.DataFrame(table)
df.to_csv('wines.csv')
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment