phwelo · March 18, 2020 01:30
diff --git a/parse.py b/parse.py
 #!/usr/bin/env python3

 from bs4 import BeautifulSoup

 import urllib.request, urllib.error, urllib.parse

 source_url = 'https://docs.google.com/spreadsheets/u/0/d/e/2PACX-1vR30F8lYP3jG7YOq8es0PBpJIE5yvRVZffOyaqC0GgMBN6yt0Q-NI8pxS7hd1F9dYXnowSC6zpZmW9D/pubhtml/sheet?gid=0'

 def get_table_rows(url):
    response = urllib.request.urlopen(url)
    webContent = BeautifulSoup(response.read(), features="lxml")
    parsed_table = (webContent.body.find('table', attrs={'class':'waffle'}))
    return parsed_table.find_all('tr')

 def parse_rows(row_array):
    parsed_rows = []
    # this document actually starts at row 7 at the moment
    for row in table_rows[7:]:
        current_row = {}
        all_td = row.find_all('td')
        current_row['country'] = all_td[0].text
        if 'Queue' in current_row['country']:
            break
        current_row['cases'] = all_td[1].text
        current_row['deaths'] = all_td[2].text
        parsed_rows.append(current_row)
    return parsed_rows

 table_rows = get_table_rows(source_url)
 final_obj  = parse_rows(table_rows)
 print(final_obj)
	#!/usr/bin/env python3

	from bs4 import BeautifulSoup

	import urllib.request, urllib.error, urllib.parse

	source_url = 'https://docs.google.com/spreadsheets/u/0/d/e/2PACX-1vR30F8lYP3jG7YOq8es0PBpJIE5yvRVZffOyaqC0GgMBN6yt0Q-NI8pxS7hd1F9dYXnowSC6zpZmW9D/pubhtml/sheet?gid=0'

	def get_table_rows(url):
	response = urllib.request.urlopen(url)
	webContent = BeautifulSoup(response.read(), features="lxml")
	parsed_table = (webContent.body.find('table', attrs={'class':'waffle'}))
	return parsed_table.find_all('tr')

	def parse_rows(row_array):
	parsed_rows = []
	# this document actually starts at row 7 at the moment
	for row in table_rows[7:]:
	current_row = {}
	all_td = row.find_all('td')
	current_row['country'] = all_td[0].text
	if 'Queue' in current_row['country']:
	break
	current_row['cases'] = all_td[1].text
	current_row['deaths'] = all_td[2].text
	parsed_rows.append(current_row)
	return parsed_rows

	table_rows = get_table_rows(source_url)
	final_obj = parse_rows(table_rows)
	print(final_obj)