Skip to content

Instantly share code, notes, and snippets.

@ashutoshkrris
Last active April 16, 2022 08:56
Show Gist options
  • Save ashutoshkrris/0c9993aaf36247a7c23ff29b48ae8d36 to your computer and use it in GitHub Desktop.
Save ashutoshkrris/0c9993aaf36247a7c23ff29b48ae8d36 to your computer and use it in GitHub Desktop.
import csv
import json
from bs4 import BeautifulSoup
from selenium import webdriver
BROWSER = webdriver.Chrome(executable_path="chromedriver.exe")
TOTAL_PERSONS = 100
def data_scraper():
BROWSER.get("https://www.bloomberg.com/billionaires/")
html_source = BROWSER.page_source
BROWSER.close()
soup = BeautifulSoup(html_source, 'html.parser')
response_rank = soup.find_all('div', class_='table-cell t-rank')
ranks = [rank.get_text().strip() for rank in response_rank][:TOTAL_PERSONS]
response_name = soup.find_all('div', class_='table-cell t-name')
names = [name.get_text().strip() for name in response_name][:TOTAL_PERSONS]
links = [(name.find('a')['href']).replace("./", "") for name in response_name]
response_worth = soup.find_all('div', class_='table-cell active t-nw')
worths = [worth.get_text().strip()
for worth in response_worth][1:TOTAL_PERSONS+1]
response_last_change = soup.find_all('div', class_='t-lcd')
last_changes = [change.get_text().strip()
for change in response_last_change][1:TOTAL_PERSONS+1]
response_ytd = soup.find_all('div', class_='t-ycd')
ytds = [ytd.get_text().strip() for ytd in response_ytd][1:TOTAL_PERSONS+1]
response_country = soup.find_all('div', class_='table-cell t-country')
countries = [country.get_text().strip()
for country in response_country][1:TOTAL_PERSONS+1]
response_industry = soup.find_all('div', class_='table-cell t-industry')
industries = [industry.get_text().strip()
for industry in response_industry][1:TOTAL_PERSONS+1]
data_dict = {
"ranks": ranks,
"names": names,
"links": links,
"worths": worths,
"last_changes": last_changes,
"ytds": ytds,
"countries": countries,
"industries": industries
}
return data_dict
def write_to_csv(data: dict) -> None:
columns = ['Rank', 'Name', 'Link',
'Total net worth($)', '$ Last change', '$ YTD change', 'Country/Region', 'Industry']
with open(f"top-{TOTAL_PERSONS}-persons.csv", "w", newline="") as csv_file:
writer = csv.DictWriter(csv_file, fieldnames=columns)
writer.writeheader()
for i in range(TOTAL_PERSONS):
temp = {
"Rank": data["ranks"][i],
"Name": data["names"][i],
"Link": f"https://www.bloomberg.com/billionaires/{data['links'][i]}",
"Total net worth($)": data["worths"][i],
"$ Last change": data["last_changes"][i],
"$ YTD change": data["ytds"][i],
"Country/Region": data["countries"][i],
"Industry": data["industries"][i]
}
writer.writerow(temp)
def write_to_json(data: dict) -> None:
data_list = []
for i in range(TOTAL_PERSONS):
temp = {
"Rank": data["ranks"][i],
"Name": data["names"][i],
"Link": f"https://www.bloomberg.com/billionaires/{data['links'][i]}",
"Total net worth($)": data["worths"][i],
"$ Last change": data["last_changes"][i],
"$ YTD change": data["ytds"][i],
"Country/Region": data["countries"][i],
"Industry": data["industries"][i]
}
data_list.append(temp)
with open(f"top-{TOTAL_PERSONS}-persons.json", "w") as json_file:
json.dump(data_list, json_file)
if __name__ == '__main__':
data = data_scraper()
write_to_csv(data)
write_to_json(data)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment